On 03/29/23 14:55, Juri Lelli wrote: > Qais reported that iterating over all tasks when rebuilding root domains > for finding out which ones are DEADLINE and need their bandwidth > correctly restored on such root domains can be a costly operation (10+ > ms delays on suspend-resume). > > To fix the problem keep track of the number of DEADLINE tasks belonging > to each cpuset and then use this information (followup patch) to only > perform the above iteration if DEADLINE tasks are actually present in > the cpuset for which a corresponding root domain is being rebuilt. > > Reported-by: Qais Yousef <qyousef@xxxxxxxxxxx> > Link: https://lore.kernel.org/lkml/20230206221428.2125324-1-qyousef@xxxxxxxxxxx/ > Signed-off-by: Juri Lelli <juri.lelli@xxxxxxxxxx> > --- Reviewed-by: Qais Yousef <qyousef@xxxxxxxxxxx> Tested-by: Qais Yousef <qyousef@xxxxxxxxxxx> Thanks! -- Qais Yousef > include/linux/cpuset.h | 4 ++++ > kernel/cgroup/cgroup.c | 4 ++++ > kernel/cgroup/cpuset.c | 25 +++++++++++++++++++++++++ > kernel/sched/deadline.c | 14 ++++++++++++++ > 4 files changed, 47 insertions(+) > > diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h > index 355f796c5f07..0348dba5680e 100644 > --- a/include/linux/cpuset.h > +++ b/include/linux/cpuset.h > @@ -71,6 +71,8 @@ extern void cpuset_init_smp(void); > extern void cpuset_force_rebuild(void); > extern void cpuset_update_active_cpus(void); > extern void cpuset_wait_for_hotplug(void); > +extern void inc_dl_tasks_cs(struct task_struct *task); > +extern void dec_dl_tasks_cs(struct task_struct *task); > extern void cpuset_lock(void); > extern void cpuset_unlock(void); > extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); > @@ -196,6 +198,8 @@ static inline void cpuset_update_active_cpus(void) > > static inline void cpuset_wait_for_hotplug(void) { } > > +static inline void inc_dl_tasks_cs(struct task_struct *task) { } > +static inline void dec_dl_tasks_cs(struct task_struct *task) { } > static inline void cpuset_lock(void) { } > static inline void cpuset_unlock(void) { } > > diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c > index 935e8121b21e..ff27b2d2bf0b 100644 > --- a/kernel/cgroup/cgroup.c > +++ b/kernel/cgroup/cgroup.c > @@ -57,6 +57,7 @@ > #include <linux/file.h> > #include <linux/fs_parser.h> > #include <linux/sched/cputime.h> > +#include <linux/sched/deadline.h> > #include <linux/psi.h> > #include <net/sock.h> > > @@ -6673,6 +6674,9 @@ void cgroup_exit(struct task_struct *tsk) > list_add_tail(&tsk->cg_list, &cset->dying_tasks); > cset->nr_tasks--; > > + if (dl_task(tsk)) > + dec_dl_tasks_cs(tsk); > + > WARN_ON_ONCE(cgroup_task_frozen(tsk)); > if (unlikely(!(tsk->flags & PF_KTHREAD) && > test_bit(CGRP_FREEZE, &task_dfl_cgroup(tsk)->flags))) > diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c > index fbc10b494292..eb0854ef9757 100644 > --- a/kernel/cgroup/cpuset.c > +++ b/kernel/cgroup/cpuset.c > @@ -193,6 +193,12 @@ struct cpuset { > int use_parent_ecpus; > int child_ecpus_count; > > + /* > + * number of SCHED_DEADLINE tasks attached to this cpuset, so that we > + * know when to rebuild associated root domain bandwidth information. > + */ > + int nr_deadline_tasks; > + > /* Invalid partition error code, not lock protected */ > enum prs_errcode prs_err; > > @@ -245,6 +251,20 @@ static inline struct cpuset *parent_cs(struct cpuset *cs) > return css_cs(cs->css.parent); > } > > +void inc_dl_tasks_cs(struct task_struct *p) > +{ > + struct cpuset *cs = task_cs(p); > + > + cs->nr_deadline_tasks++; > +} > + > +void dec_dl_tasks_cs(struct task_struct *p) > +{ > + struct cpuset *cs = task_cs(p); > + > + cs->nr_deadline_tasks--; > +} > + > /* bits in struct cpuset flags field */ > typedef enum { > CS_ONLINE, > @@ -2477,6 +2497,11 @@ static int cpuset_can_attach(struct cgroup_taskset *tset) > ret = security_task_setscheduler(task); > if (ret) > goto out_unlock; > + > + if (dl_task(task)) { > + cs->nr_deadline_tasks++; > + cpuset_attach_old_cs->nr_deadline_tasks--; > + } > } > > /* > diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c > index 4cc7e1ca066d..8f92f0f87383 100644 > --- a/kernel/sched/deadline.c > +++ b/kernel/sched/deadline.c > @@ -16,6 +16,8 @@ > * Fabio Checconi <fchecconi@xxxxxxxxx> > */ > > +#include <linux/cpuset.h> > + > /* > * Default limits for DL period; on the top end we guard against small util > * tasks still getting ridiculously long effective runtimes, on the bottom end we > @@ -2595,6 +2597,12 @@ static void switched_from_dl(struct rq *rq, struct task_struct *p) > if (task_on_rq_queued(p) && p->dl.dl_runtime) > task_non_contending(p); > > + /* > + * In case a task is setscheduled out from SCHED_DEADLINE we need to > + * keep track of that on its cpuset (for correct bandwidth tracking). > + */ > + dec_dl_tasks_cs(p); > + > if (!task_on_rq_queued(p)) { > /* > * Inactive timer is armed. However, p is leaving DEADLINE and > @@ -2635,6 +2643,12 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) > if (hrtimer_try_to_cancel(&p->dl.inactive_timer) == 1) > put_task_struct(p); > > + /* > + * In case a task is setscheduled to SCHED_DEADLINE we need to keep > + * track of that on its cpuset (for correct bandwidth tracking). > + */ > + inc_dl_tasks_cs(p); > + > /* If p is not queued we will update its parameters at next wakeup. */ > if (!task_on_rq_queued(p)) { > add_rq_bw(&p->dl, &rq->dl); > -- > 2.39.2 >