scx_next_task_picked() is used by sched_ext to notify the BPF scheduler when a CPU is taken away by a task dispatched from a higher priority sched_class so that the BPF scheduler can, e.g., punt the task[s] which was running or were waiting for the CPU to other CPUs. Replace the sched_ext specific hook scx_next_task_picked() with a new sched_class operation switch_class(). The changes are straightforward and the code looks better afterwards. However, when !CONFIG_SCHED_CLASS_EXT, this just ends up adding an unused hook which is unlikely to be useful to other sched_classes. We can #ifdef the op with CONFIG_SCHED_CLASS_EXT but then I'm not sure the code necessarily looks better afterwards. Please let me know the preference. If adding #ifdef's is preferable, that's okay too. Signed-off-by: Tejun Heo <tj@xxxxxxxxxx> Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> --- kernel/sched/core.c | 5 ++++- kernel/sched/ext.c | 20 ++++++++++---------- kernel/sched/ext.h | 4 ---- kernel/sched/sched.h | 2 ++ 4 files changed, 16 insertions(+), 15 deletions(-) --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5907,7 +5907,10 @@ restart: for_each_active_class(class) { p = class->pick_next_task(rq); if (p) { - scx_next_task_picked(rq, p, class); + const struct sched_class *prev_class = prev->sched_class; + + if (class != prev_class && prev_class->switch_class) + prev_class->switch_class(rq, p); return p; } } --- a/kernel/sched/ext.c +++ b/kernel/sched/ext.c @@ -2749,10 +2749,9 @@ preempt_reason_from_class(const struct s return SCX_CPU_PREEMPT_UNKNOWN; } -void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active) +static void switch_class_scx(struct rq *rq, struct task_struct *next) { - lockdep_assert_rq_held(rq); + const struct sched_class *next_class = next->sched_class; if (!scx_enabled()) return; @@ -2769,12 +2768,11 @@ void scx_next_task_picked(struct rq *rq, /* * The callback is conceptually meant to convey that the CPU is no - * longer under the control of SCX. Therefore, don't invoke the - * callback if the CPU is is staying on SCX, or going idle (in which - * case the SCX scheduler has actively decided not to schedule any - * tasks on the CPU). + * longer under the control of SCX. Therefore, don't invoke the callback + * if the next class is below SCX (in which case the BPF scheduler has + * actively decided not to schedule any tasks on the CPU). */ - if (likely(active >= &ext_sched_class)) + if (sched_class_above(&ext_sched_class, next_class)) return; /* @@ -2789,8 +2787,8 @@ void scx_next_task_picked(struct rq *rq, if (!rq->scx.cpu_released) { if (SCX_HAS_OP(cpu_release)) { struct scx_cpu_release_args args = { - .reason = preempt_reason_from_class(active), - .task = p, + .reason = preempt_reason_from_class(next_class), + .task = next, }; SCX_CALL_OP(SCX_KF_CPU_RELEASE, @@ -3496,6 +3494,8 @@ DEFINE_SCHED_CLASS(ext) = { .put_prev_task = put_prev_task_scx, .set_next_task = set_next_task_scx, + .switch_class = switch_class_scx, + #ifdef CONFIG_SMP .balance = balance_scx, .select_task_rq = select_task_rq_scx, --- a/kernel/sched/ext.h +++ b/kernel/sched/ext.h @@ -33,8 +33,6 @@ static inline bool task_on_scx(const str return scx_enabled() && p->sched_class == &ext_sched_class; } -void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active); void scx_tick(struct rq *rq); void init_scx_entity(struct sched_ext_entity *scx); void scx_pre_fork(struct task_struct *p); @@ -82,8 +80,6 @@ bool scx_prio_less(const struct task_str #define scx_enabled() false #define scx_switched_all() false -static inline void scx_next_task_picked(struct rq *rq, struct task_struct *p, - const struct sched_class *active) {} static inline void scx_tick(struct rq *rq) {} static inline void scx_pre_fork(struct task_struct *p) {} static inline int scx_fork(struct task_struct *p) { return 0; } --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2355,6 +2355,8 @@ struct sched_class { void (*put_prev_task)(struct rq *rq, struct task_struct *p); void (*set_next_task)(struct rq *rq, struct task_struct *p, bool first); + void (*switch_class)(struct rq *rq, struct task_struct *next); + #ifdef CONFIG_SMP int (*balance)(struct rq *rq, struct task_struct *prev, struct rq_flags *rf); int (*select_task_rq)(struct task_struct *p, int task_cpu, int flags);