On Thu, Nov 14, 2024 at 02:28:10PM +0000 Juri Lelli wrote: > For hotplug operations, DEADLINE needs to check that there is still enough > bandwidth left after removing the CPU that is going offline. We however > fail to do so currently. > > Restore the correct behavior by restructuring dl_bw_manage() a bit, so > that overflow conditions (not enough bandwidth left) are properly > checked. Also account for dl_server bandwidth, i.e. discount such > bandwidth in the calculation since NORMAL tasks will be anyway moved > away from the CPU as a result of the hotplug operation. > > Signed-off-by: Juri Lelli <juri.lelli@xxxxxxxxxx> > Nice, thanks! Reviewed-by: Phil Auld <pauld@xxxxxxxxxx> > --- > v1->v2: special case when total_bw = 0 (discounting dl_servers) > --- > kernel/sched/core.c | 2 +- > kernel/sched/deadline.c | 48 +++++++++++++++++++++++++++++++++-------- > kernel/sched/sched.h | 2 +- > 3 files changed, 41 insertions(+), 11 deletions(-) > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c > index 43e453ab7e20..d1049e784510 100644 > --- a/kernel/sched/core.c > +++ b/kernel/sched/core.c > @@ -8057,7 +8057,7 @@ static void cpuset_cpu_active(void) > static int cpuset_cpu_inactive(unsigned int cpu) > { > if (!cpuhp_tasks_frozen) { > - int ret = dl_bw_check_overflow(cpu); > + int ret = dl_bw_deactivate(cpu); > > if (ret) > return ret; > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c > index a9cdbf058871..267ea8bacaf6 100644 > --- a/kernel/sched/deadline.c > +++ b/kernel/sched/deadline.c > @@ -3470,29 +3470,31 @@ int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, > } > > enum dl_bw_request { > - dl_bw_req_check_overflow = 0, > + dl_bw_req_deactivate = 0, > dl_bw_req_alloc, > dl_bw_req_free > }; > > static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw) > { > - unsigned long flags; > + unsigned long flags, cap; > struct dl_bw *dl_b; > bool overflow = 0; > + u64 fair_server_bw = 0; > > rcu_read_lock_sched(); > dl_b = dl_bw_of(cpu); > raw_spin_lock_irqsave(&dl_b->lock, flags); > > - if (req == dl_bw_req_free) { > + cap = dl_bw_capacity(cpu); > + switch (req) { > + case dl_bw_req_free: > __dl_sub(dl_b, dl_bw, dl_bw_cpus(cpu)); > - } else { > - unsigned long cap = dl_bw_capacity(cpu); > - > + break; > + case dl_bw_req_alloc: > overflow = __dl_overflow(dl_b, cap, 0, dl_bw); > > - if (req == dl_bw_req_alloc && !overflow) { > + if (!overflow) { > /* > * We reserve space in the destination > * root_domain, as we can't fail after this point. > @@ -3501,6 +3503,34 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw) > */ > __dl_add(dl_b, dl_bw, dl_bw_cpus(cpu)); > } > + break; > + case dl_bw_req_deactivate: > + /* > + * cpu is going offline and NORMAL tasks will be moved away > + * from it. We can thus discount dl_server bandwidth > + * contribution as it won't need to be servicing tasks after > + * the cpu is off. > + */ > + if (cpu_rq(cpu)->fair_server.dl_server) > + fair_server_bw = cpu_rq(cpu)->fair_server.dl_bw; > + > + /* > + * Not much to check if no DEADLINE bandwidth is present. > + * dl_servers we can discount, as tasks will be moved out the > + * offlined CPUs anyway. > + */ > + if (dl_b->total_bw - fair_server_bw > 0) { > + /* > + * Leaving at least one CPU for DEADLINE tasks seems a > + * wise thing to do. > + */ > + if (dl_bw_cpus(cpu)) > + overflow = __dl_overflow(dl_b, cap, fair_server_bw, 0); > + else > + overflow = 1; > + } > + > + break; > } > > raw_spin_unlock_irqrestore(&dl_b->lock, flags); > @@ -3509,9 +3539,9 @@ static int dl_bw_manage(enum dl_bw_request req, int cpu, u64 dl_bw) > return overflow ? -EBUSY : 0; > } > > -int dl_bw_check_overflow(int cpu) > +int dl_bw_deactivate(int cpu) > { > - return dl_bw_manage(dl_bw_req_check_overflow, cpu, 0); > + return dl_bw_manage(dl_bw_req_deactivate, cpu, 0); > } > > int dl_bw_alloc(int cpu, u64 dl_bw) > diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h > index b1c3588a8f00..1fee840f1bab 100644 > --- a/kernel/sched/sched.h > +++ b/kernel/sched/sched.h > @@ -362,7 +362,7 @@ extern void __getparam_dl(struct task_struct *p, struct sched_attr *attr); > extern bool __checkparam_dl(const struct sched_attr *attr); > extern bool dl_param_changed(struct task_struct *p, const struct sched_attr *attr); > extern int dl_cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); > -extern int dl_bw_check_overflow(int cpu); > +extern int dl_bw_deactivate(int cpu); > extern s64 dl_scaled_delta_exec(struct rq *rq, struct sched_dl_entity *dl_se, s64 delta_exec); > /* > * SCHED_DEADLINE supports servers (nested scheduling) with the following > -- > 2.47.0 > --