Hi, if I understand well, your patch addresses 2 separate issues: 1) The current reclaiming code uses an approximation to avoid using div64_u64(), which might introduce too much overhead (at least, as far as I remember :). Your patch changes it to use the exact, non-approximated, equation 2) Currently, the reclaimable CPU time is divided as if all the SCHED_DEADLINE tasks (and not only the SCHED_FLAG_RECLAIM tasks) could use it; your patch changes the code to distribute the reclaimable CPU time only to tasks having the SCHED_FLAG_RECLAIM flag set Is this understanding correct? If using div64_u64() does not introduce too much overhead, then I agree with the first change. The second change also looks good to me. I have no comments on the code, but there is one thing in the comments that looks misleading to me (or I am misunderstanding the code or the comments): On Mon, 8 May 2023 12:08:28 -0400 Vineeth Pillai <vineeth@xxxxxxxxxxxxxxx> wrote: [...] > + * "dq = -(Ureclaim / Umax_reclaim) * dt" > + * Where > + * Ureclaim: Active Bandwidth of SCHED_FLAG_RECLAIM tasks for this rq. > + * Umax_reclaim: Maximum reclaimable bandwidth for this rq. > + * > + * We can calculate Umax_reclaim as: > + * Umax_reclaim: this_bw + Uinact + Ureclaim I think this looks like a typo (summing this_bw to Uinact looks wrong). Should "this_bw" be Uextra? > + * Where: > + * this_bw: Reserved bandwidth for this runqueue. > + * Ureclaim: Active Bandwidth of SCHED_FLAG_RECLAIM tasks for this rq. > + * Uinact: Inactive utilization (this_bw - running_bw) > + * Uextra: Extra bandwidth(Usually Umax - this_bw) > + * Umax: Max usable bandwidth. Currently > + * = sched_rt_runtime_us / sched_rt_period_us > + * > + * We use the above formula to scale the runtime down > + * > + * dq = -(Ureclaim / Umax_reclaim) * dt > + * = -(Ureclaim / (Ureclaim + Uextra + Uinact)) * dt I think this should be the correct equation. BTW, since you are summing Uextra and Uinact, mabe you could just use "Umax - running_bw"? Luca > */ > static u64 grub_reclaim(u64 delta, struct rq *rq, struct > sched_dl_entity *dl_se) { > + u64 scaled_delta; > u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - > Uact */ > - u64 u_act; > - u64 u_act_min = (dl_se->dl_bw * rq->dl.bw_ratio) >> > RATIO_SHIFT; > + u64 reclaimable_bw = rq->dl.extra_bw + u_inact; > > - /* > - * Instead of computing max{u * bw_ratio, (1 - u_inact - > u_extra)}, > - * we compare u_inact + rq->dl.extra_bw with > - * 1 - (u * rq->dl.bw_ratio >> RATIO_SHIFT), because > - * u_inact + rq->dl.extra_bw can be larger than > - * 1 * (so, 1 - u_inact - rq->dl.extra_bw would be negative > - * leading to wrong results) > - */ > - if (u_inact + rq->dl.extra_bw > BW_UNIT - u_act_min) > - u_act = u_act_min; > - else > - u_act = BW_UNIT - u_inact - rq->dl.extra_bw; > + if (reclaimable_bw > rq->dl.max_bw) > + reclaimable_bw = rq->dl.max_bw; > > - return (delta * u_act) >> BW_SHIFT; > + scaled_delta = div64_u64(delta * rq->dl.reclaim_bw, > + (rq->dl.reclaim_bw + reclaimable_bw)); > + return scaled_delta; > } > > /* > @@ -2783,12 +2797,9 @@ int sched_dl_global_validate(void) > static void init_dl_rq_bw_ratio(struct dl_rq *dl_rq) > { > if (global_rt_runtime() == RUNTIME_INF) { > - dl_rq->bw_ratio = 1 << RATIO_SHIFT; > - dl_rq->extra_bw = 1 << BW_SHIFT; > + dl_rq->max_bw = dl_rq->extra_bw = 1 << BW_SHIFT; > } else { > - dl_rq->bw_ratio = to_ratio(global_rt_runtime(), > - global_rt_period()) >> (BW_SHIFT - > RATIO_SHIFT); > - dl_rq->extra_bw = to_ratio(global_rt_period(), > + dl_rq->max_bw = dl_rq->extra_bw = > to_ratio(global_rt_period(), global_rt_runtime()); > } > } > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index 3e8df6d31c1e..13d85af0f42b 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -257,6 +257,11 @@ static inline bool dl_entity_is_special(const > struct sched_dl_entity *dl_se) #endif > } > > +static inline bool dl_entity_is_reclaim(const struct sched_dl_entity > *dl_se) +{ > + return dl_se->flags & SCHED_FLAG_RECLAIM; > +} > + > /* > * Tells if entity @a should preempt entity @b. > */ > @@ -754,10 +759,20 @@ struct dl_rq { > u64 extra_bw; > > /* > - * Inverse of the fraction of CPU utilization that can be > reclaimed > - * by the GRUB algorithm. > + * Maximum available bandwidth for this runqueue. This is > used to > + * calculate reclaimable bandwidth for SCHED_FLAG_RECLAIM > tasks. > + * By restricting maximum usable bandwidth, we aim to give > other > + * tasks on lower classes a chance to run, when competing > with > + * SCHED_FLAG_RECLAIM tasks. > */ > - u64 bw_ratio; > + u64 max_bw; > + > + /* > + * Active bandwidth of SCHED_FLAG_RECLAIM tasks on this rq. > + * This will be a subset of running_bw. > + */ > + u64 reclaim_bw; > + > }; > > #ifdef CONFIG_FAIR_GROUP_SCHED