Enable passing idle flags (%SCX_PICK_IDLE_*) to scx_select_cpu_dfl(), to enforce strict selection criteria, such as selecting an idle CPU strictly within @prev_cpu's node or choosing only a fully idle SMT core. This functionality will be exposed through a dedicated kfunc in a separate patch. Signed-off-by: Andrea Righi <arighi@xxxxxxxxxx> --- kernel/sched/ext.c | 2 +- kernel/sched/ext_idle.c | 41 ++++++++++++++++++++++++++++++----------- kernel/sched/ext_idle.h | 2 +- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c index debcd1cf2de9b..5cd878bbd0e39 100644 --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -3396,7 +3396,7 @@ static int select_task_rq_scx(struct task_struct *p, int prev_cpu, int wake_flag bool found; s32 cpu; - cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, &found); + cpu = scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0, &found); p->scx.selected_cpu = cpu; if (found) { p->scx.slice = SCX_SLICE_DFL; diff --git a/kernel/sched/ext_idle.c b/kernel/sched/ext_idle.c index 15e9d1c8b2815..16981456ec1ed 100644 --- a/kernel/sched/ext_idle.c +++ b/kernel/sched/ext_idle.c @@ -418,7 +418,7 @@ void scx_idle_update_selcpu_topology(struct sched_ext_ops *ops) * NOTE: tasks that can only run on 1 CPU are excluded by this logic, because * we never call ops.select_cpu() for them, see select_task_rq(). */ -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *found) +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags, bool *found) { const struct cpumask *llc_cpus = NULL; const struct cpumask *numa_cpus = NULL; @@ -455,12 +455,13 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool * If WAKE_SYNC, try to migrate the wakee to the waker's CPU. */ if (wake_flags & SCX_WAKE_SYNC) { - cpu = smp_processor_id(); + int waker_node; /* * If the waker's CPU is cache affine and prev_cpu is idle, * then avoid a migration. */ + cpu = smp_processor_id(); if (cpus_share_cache(cpu, prev_cpu) && scx_idle_test_and_clear_cpu(prev_cpu)) { cpu = prev_cpu; @@ -480,9 +481,11 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool * piled up on it even if there is an idle core elsewhere on * the system. */ + waker_node = cpu_to_node(cpu); if (!(current->flags & PF_EXITING) && cpu_rq(cpu)->scx.local_dsq.nr == 0 && - !cpumask_empty(idle_cpumask(cpu_to_node(cpu))->cpu)) { + (!(flags & SCX_PICK_IDLE_IN_NODE) || (waker_node == node)) && + !cpumask_empty(idle_cpumask(waker_node)->cpu)) { if (cpumask_test_cpu(cpu, p->cpus_ptr)) goto cpu_found; } @@ -521,15 +524,25 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool } /* - * Search for any full idle core usable by the task. + * Search for any full-idle core usable by the task. * - * If NUMA aware idle selection is enabled, the search will + * If the node-aware idle CPU selection policy is enabled + * (%SCX_OPS_BUILTIN_IDLE_PER_NODE), the search will always * begin in prev_cpu's node and proceed to other nodes in * order of increasing distance. */ - cpu = scx_pick_idle_cpu(p->cpus_ptr, node, SCX_PICK_IDLE_CORE); + cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags | SCX_PICK_IDLE_CORE); if (cpu >= 0) goto cpu_found; + + /* + * Give up if we're strictly looking for a full-idle SMT + * core. + */ + if (flags & SCX_PICK_IDLE_CORE) { + cpu = prev_cpu; + goto out_unlock; + } } /* @@ -560,18 +573,24 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool /* * Search for any idle CPU usable by the task. + * + * If the node-aware idle CPU selection policy is enabled + * (%SCX_OPS_BUILTIN_IDLE_PER_NODE), the search will always begin + * in prev_cpu's node and proceed to other nodes in order of + * increasing distance. */ - cpu = scx_pick_idle_cpu(p->cpus_ptr, node, 0); + cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags); if (cpu >= 0) goto cpu_found; - rcu_read_unlock(); - return prev_cpu; + cpu = prev_cpu; + goto out_unlock; cpu_found: + *found = true; +out_unlock: rcu_read_unlock(); - *found = true; return cpu; } @@ -810,7 +829,7 @@ __bpf_kfunc s32 scx_bpf_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, goto prev_cpu; #ifdef CONFIG_SMP - return scx_select_cpu_dfl(p, prev_cpu, wake_flags, is_idle); + return scx_select_cpu_dfl(p, prev_cpu, wake_flags, 0, is_idle); #endif prev_cpu: diff --git a/kernel/sched/ext_idle.h b/kernel/sched/ext_idle.h index 68c4307ce4f6f..5c1db6b315f7a 100644 --- a/kernel/sched/ext_idle.h +++ b/kernel/sched/ext_idle.h @@ -27,7 +27,7 @@ static inline s32 scx_pick_idle_cpu(const struct cpumask *cpus_allowed, int node } #endif /* CONFIG_SMP */ -s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, bool *found); +s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64 flags, bool *found); void scx_idle_enable(struct sched_ext_ops *ops); void scx_idle_disable(void); int scx_idle_init(void); -- 2.48.1