The no preemption model allows running to completion in kernel context. For voluntary preemption, allow preemption by higher scheduling classes. To do this resched_curr() now takes a parameter that specifies if the resched is for a scheduler class above the runqueue's current task. And reschedules eagerly, if so. Also define scheduler feature PREEMPT_PRIORITY which can be used to toggle voluntary preemption model at runtime. TODO: Both RT, deadline work but I'm almost certainly not doing all the right things for both. Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx> --- kernel/Kconfig.preempt | 19 ++++++------------- kernel/sched/core.c | 28 +++++++++++++++++----------- kernel/sched/core_sched.c | 2 +- kernel/sched/deadline.c | 22 +++++++++++----------- kernel/sched/fair.c | 18 +++++++++--------- kernel/sched/features.h | 5 +++++ kernel/sched/idle.c | 2 +- kernel/sched/rt.c | 26 +++++++++++++------------- kernel/sched/sched.h | 2 +- 9 files changed, 64 insertions(+), 60 deletions(-) diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 074fe5e253b5..e16114b679e3 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -20,23 +20,16 @@ config PREEMPT_NONE at runtime. config PREEMPT_VOLUNTARY - bool "Voluntary Kernel Preemption (Desktop)" + bool "Voluntary Kernel Preemption" depends on !ARCH_NO_PREEMPT select PREEMPTION help - This option reduces the latency of the kernel by adding more - "explicit preemption points" to the kernel code. These new - preemption points have been selected to reduce the maximum - latency of rescheduling, providing faster application reactions, - at the cost of slightly lower throughput. + This option reduces the latency of the kernel by allowing + processes in higher scheduling policy classes preempt ones + lower down. - This allows reaction to interactive events by allowing a - low priority process to voluntarily preempt itself even if it - is in kernel mode executing a system call. This allows - applications to run more 'smoothly' even when the system is - under load. - - Select this if you are building a kernel for a desktop system. + Higher priority processes in the same scheduling policy class + do not preempt others in the same class. config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 2a50a64255c6..3fa78e8afb7d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -256,7 +256,7 @@ void sched_core_dequeue(struct rq *rq, struct task_struct *p, int flags) */ if (!(flags & DEQUEUE_SAVE) && rq->nr_running == 1 && rq->core->core_forceidle_count && rq->curr == rq->idle) - resched_curr(rq); + resched_curr(rq, false); } static int sched_task_is_throttled(struct task_struct *p, int cpu) @@ -1074,9 +1074,12 @@ void __resched_curr(struct rq *rq, resched_t rs) * * - in userspace: run to completion semantics are only for kernel tasks * - * Otherwise (regardless of priority), run to completion. + * - running under voluntary preemption (sched_feat(PREEMPT_PRIORITY)) + * and a task from a sched_class above wants the CPU + * + * Otherwise, run to completion. */ -void resched_curr(struct rq *rq) +void resched_curr(struct rq *rq, bool above) { resched_t rs = RESCHED_lazy; int context; @@ -1112,6 +1115,9 @@ void resched_curr(struct rq *rq) goto resched; } + if (sched_feat(PREEMPT_PRIORITY) && above) + rs = RESCHED_eager; + resched: __resched_curr(rq, rs); } @@ -1123,7 +1129,7 @@ void resched_cpu(int cpu) raw_spin_rq_lock_irqsave(rq, flags); if (cpu_online(cpu) || cpu == smp_processor_id()) - resched_curr(rq); + resched_curr(rq, true); raw_spin_rq_unlock_irqrestore(rq, flags); } @@ -2277,7 +2283,7 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags) if (p->sched_class == rq->curr->sched_class) rq->curr->sched_class->check_preempt_curr(rq, p, flags); else if (sched_class_above(p->sched_class, rq->curr->sched_class)) - resched_curr(rq); + resched_curr(rq, true); /* * A queue event has occurred, and we're going to schedule. In @@ -2764,7 +2770,7 @@ int push_cpu_stop(void *arg) deactivate_task(rq, p, 0); set_task_cpu(p, lowest_rq->cpu); activate_task(lowest_rq, p, 0); - resched_curr(lowest_rq); + resched_curr(lowest_rq, true); } double_unlock_balance(rq, lowest_rq); @@ -3999,7 +4005,7 @@ void wake_up_if_idle(int cpu) if (is_idle_task(rcu_dereference(rq->curr))) { guard(rq_lock_irqsave)(rq); if (is_idle_task(rq->curr)) - resched_curr(rq); + resched_curr(rq, true); } } @@ -6333,7 +6339,7 @@ pick_next_task(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) continue; } - resched_curr(rq_i); + resched_curr(rq_i, false); } out_set_next: @@ -6388,7 +6394,7 @@ static bool try_steal_cookie(int this, int that) set_task_cpu(p, this); activate_task(dst, p, 0); - resched_curr(dst); + resched_curr(dst, false); success = true; break; @@ -8743,7 +8749,7 @@ int __sched yield_to(struct task_struct *p, bool preempt) * fairness. */ if (preempt && rq != p_rq) - resched_curr(p_rq); + resched_curr(p_rq, true); } out_unlock: @@ -10300,7 +10306,7 @@ void sched_move_task(struct task_struct *tsk) * throttled one but it's still the running task. Trigger a * resched to make sure that task can still run. */ - resched_curr(rq); + resched_curr(rq, true); } unlock: diff --git a/kernel/sched/core_sched.c b/kernel/sched/core_sched.c index a57fd8f27498..32f234f2a210 100644 --- a/kernel/sched/core_sched.c +++ b/kernel/sched/core_sched.c @@ -89,7 +89,7 @@ static unsigned long sched_core_update_cookie(struct task_struct *p, * next scheduling edge, rather than always forcing a reschedule here. */ if (task_on_cpu(rq, p)) - resched_curr(rq); + resched_curr(rq, false); task_rq_unlock(rq, p, &rf); diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e6815c3bd2f0..ecb47b5e9588 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1177,7 +1177,7 @@ static enum hrtimer_restart dl_task_timer(struct hrtimer *timer) if (dl_task(rq->curr)) check_preempt_curr_dl(rq, p, 0); else - resched_curr(rq); + resched_curr(rq, false); #ifdef CONFIG_SMP /* @@ -1367,7 +1367,7 @@ static void update_curr_dl(struct rq *rq) enqueue_task_dl(rq, curr, ENQUEUE_REPLENISH); if (!is_leftmost(curr, &rq->dl)) - resched_curr(rq); + resched_curr(rq, false); } /* @@ -1914,7 +1914,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) cpudl_find(&rq->rd->cpudl, p, NULL)) return; - resched_curr(rq); + resched_curr(rq, false); } static int balance_dl(struct rq *rq, struct task_struct *p, struct rq_flags *rf) @@ -1943,7 +1943,7 @@ static void check_preempt_curr_dl(struct rq *rq, struct task_struct *p, int flags) { if (dl_entity_preempt(&p->dl, &rq->curr->dl)) { - resched_curr(rq); + resched_curr(rq, false); return; } @@ -2307,7 +2307,7 @@ static int push_dl_task(struct rq *rq) if (dl_task(rq->curr) && dl_time_before(next_task->dl.deadline, rq->curr->dl.deadline) && rq->curr->nr_cpus_allowed > 1) { - resched_curr(rq); + resched_curr(rq, false); return 0; } @@ -2353,7 +2353,7 @@ static int push_dl_task(struct rq *rq) activate_task(later_rq, next_task, 0); ret = 1; - resched_curr(later_rq); + resched_curr(later_rq, false); double_unlock_balance(rq, later_rq); @@ -2457,7 +2457,7 @@ static void pull_dl_task(struct rq *this_rq) } if (resched) - resched_curr(this_rq); + resched_curr(this_rq, false); } /* @@ -2654,7 +2654,7 @@ static void switched_to_dl(struct rq *rq, struct task_struct *p) if (dl_task(rq->curr)) check_preempt_curr_dl(rq, p, 0); else - resched_curr(rq); + resched_curr(rq, false); } else { update_dl_rq_load_avg(rq_clock_pelt(rq), rq, 0); } @@ -2687,7 +2687,7 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p, * runqueue. */ if (dl_time_before(rq->dl.earliest_dl.curr, p->dl.deadline)) - resched_curr(rq); + resched_curr(rq, false); } else { /* * Current may not be deadline in case p was throttled but we @@ -2697,14 +2697,14 @@ static void prio_changed_dl(struct rq *rq, struct task_struct *p, */ if (!dl_task(rq->curr) || dl_time_before(p->dl.deadline, rq->curr->dl.deadline)) - resched_curr(rq); + resched_curr(rq, false); } #else /* * We don't know if p has a earlier or later deadline, so let's blindly * set a (maybe not needed) rescheduling point. */ - resched_curr(rq); + resched_curr(rq, false); #endif } diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index fe7e5e9b2207..448fe36e7bbb 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1046,7 +1046,7 @@ static void update_deadline(struct cfs_rq *cfs_rq, if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) __resched_curr(rq, RESCHED_eager); else - resched_curr(rq); + resched_curr(rq, false); clear_buddies(cfs_rq, se); } @@ -5337,7 +5337,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) * validating it and just reschedule. */ if (queued) { - resched_curr(rq_of(cfs_rq)); + resched_curr(rq_of(cfs_rq), false); return; } /* @@ -5483,7 +5483,7 @@ static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) * hierarchy can be throttled */ if (!assign_cfs_rq_runtime(cfs_rq) && likely(cfs_rq->curr)) - resched_curr(rq_of(cfs_rq)); + resched_curr(rq_of(cfs_rq), false); } static __always_inline @@ -5743,7 +5743,7 @@ void unthrottle_cfs_rq(struct cfs_rq *cfs_rq) /* Determine whether we need to wake up potentially idle CPU: */ if (rq->curr == rq->idle && rq->cfs.nr_running) - resched_curr(rq); + resched_curr(rq, false); } #ifdef CONFIG_SMP @@ -6448,7 +6448,7 @@ static void hrtick_start_fair(struct rq *rq, struct task_struct *p) if (delta < 0) { if (task_current(rq, p)) - resched_curr(rq); + resched_curr(rq, false); return; } hrtick_start(rq, delta); @@ -8143,7 +8143,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ return; preempt: - resched_curr(rq); + resched_curr(rq, false); } #ifdef CONFIG_SMP @@ -12294,7 +12294,7 @@ static inline void task_tick_core(struct rq *rq, struct task_struct *curr) */ if (rq->core->core_forceidle_count && rq->cfs.nr_running == 1 && __entity_slice_used(&curr->se, MIN_NR_TASKS_DURING_FORCEIDLE)) - resched_curr(rq); + resched_curr(rq, false); } /* @@ -12459,7 +12459,7 @@ prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) */ if (task_current(rq, p)) { if (p->prio > oldprio) - resched_curr(rq); + resched_curr(rq, false); } else check_preempt_curr(rq, p, 0); } @@ -12561,7 +12561,7 @@ static void switched_to_fair(struct rq *rq, struct task_struct *p) * if we can still preempt the current task. */ if (task_current(rq, p)) - resched_curr(rq); + resched_curr(rq, false); else check_preempt_curr(rq, p, 0); } diff --git a/kernel/sched/features.h b/kernel/sched/features.h index 9b4c2967b2b7..9bf30732b03f 100644 --- a/kernel/sched/features.h +++ b/kernel/sched/features.h @@ -92,6 +92,11 @@ SCHED_FEAT(HZ_BW, true) #if defined(CONFIG_PREEMPT) SCHED_FEAT(FORCE_PREEMPT, true) +SCHED_FEAT(PREEMPT_PRIORITY, true) +#elif defined(CONFIG_PREEMPT_VOLUNTARY) +SCHED_FEAT(FORCE_PREEMPT, false) +SCHED_FEAT(PREEMPT_PRIORITY, true) #else SCHED_FEAT(FORCE_PREEMPT, false) +SCHED_FEAT(PREEMPT_PRIORITY, false) #endif diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index eacd204e2879..3ef039869be9 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -403,7 +403,7 @@ balance_idle(struct rq *rq, struct task_struct *prev, struct rq_flags *rf) */ static void check_preempt_curr_idle(struct rq *rq, struct task_struct *p, int flags) { - resched_curr(rq); + resched_curr(rq, true); } static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 5fdb93f1b87e..8d87e42d30d8 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -589,7 +589,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) enqueue_rt_entity(rt_se, 0); if (rt_rq->highest_prio.curr < curr->prio) - resched_curr(rq); + resched_curr(rq, false); } } @@ -682,7 +682,7 @@ static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) return; enqueue_top_rt_rq(rt_rq); - resched_curr(rq); + resched_curr(rq, false); } static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) @@ -1076,7 +1076,7 @@ static void update_curr_rt(struct rq *rq) rt_rq->rt_time += delta_exec; exceeded = sched_rt_runtime_exceeded(rt_rq); if (exceeded) - resched_curr(rq); + resched_curr(rq, false); raw_spin_unlock(&rt_rq->rt_runtime_lock); if (exceeded) do_start_rt_bandwidth(sched_rt_bandwidth(rt_rq)); @@ -1691,7 +1691,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) * to try and push the current task away: */ requeue_task_rt(rq, p, 1); - resched_curr(rq); + resched_curr(rq, false); } static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) @@ -1718,7 +1718,7 @@ static int balance_rt(struct rq *rq, struct task_struct *p, struct rq_flags *rf) static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) { if (p->prio < rq->curr->prio) { - resched_curr(rq); + resched_curr(rq, false); return; } @@ -2074,7 +2074,7 @@ static int push_rt_task(struct rq *rq, bool pull) * just reschedule current. */ if (unlikely(next_task->prio < rq->curr->prio)) { - resched_curr(rq); + resched_curr(rq, false); return 0; } @@ -2162,7 +2162,7 @@ static int push_rt_task(struct rq *rq, bool pull) deactivate_task(rq, next_task, 0); set_task_cpu(next_task, lowest_rq->cpu); activate_task(lowest_rq, next_task, 0); - resched_curr(lowest_rq); + resched_curr(lowest_rq, false); ret = 1; double_unlock_balance(rq, lowest_rq); @@ -2456,7 +2456,7 @@ static void pull_rt_task(struct rq *this_rq) } if (resched) - resched_curr(this_rq); + resched_curr(this_rq, false); } /* @@ -2555,7 +2555,7 @@ static void switched_to_rt(struct rq *rq, struct task_struct *p) rt_queue_push_tasks(rq); #endif /* CONFIG_SMP */ if (p->prio < rq->curr->prio && cpu_online(cpu_of(rq))) - resched_curr(rq); + resched_curr(rq, false); } } @@ -2583,11 +2583,11 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) * then reschedule. */ if (p->prio > rq->rt.highest_prio.curr) - resched_curr(rq); + resched_curr(rq, false); #else /* For UP simply resched on drop of prio */ if (oldprio < p->prio) - resched_curr(rq); + resched_curr(rq, false); #endif /* CONFIG_SMP */ } else { /* @@ -2596,7 +2596,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) * then reschedule. */ if (p->prio < rq->curr->prio) - resched_curr(rq); + resched_curr(rq, false); } } @@ -2668,7 +2668,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) if (test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) __resched_curr(rq, RESCHED_eager); else - resched_curr(rq); + resched_curr(rq, false); return; } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e29a8897f573..9a745dd7482f 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2435,7 +2435,7 @@ extern void init_sched_fair_class(void); extern void reweight_task(struct task_struct *p, int prio); extern void __resched_curr(struct rq *rq, resched_t rs); -extern void resched_curr(struct rq *rq); +extern void resched_curr(struct rq *rq, bool above); extern void resched_cpu(int cpu); extern struct rt_bandwidth def_rt_bandwidth; -- 2.31.1