The kernel can have long running tasks which don't pass through preemption points for prolonged periods and so will never see a scheduler's polite TIF_NEED_RESCHED_LAZY. Force a reschedule at the next tick by upgrading to TIF_NEED_RESCHED, which will get folded into the preempt_count and a reschedule at the next safe preemption point. TODO: deadline scheduler. Originally-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Ankur Arora <ankur.a.arora@xxxxxxxxxx> --- kernel/sched/fair.c | 32 +++++++++++++++++++++++--------- kernel/sched/rt.c | 7 ++++++- kernel/sched/sched.h | 1 + 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 4d86c618ffa2..fe7e5e9b2207 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i * this is probably good enough. */ -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) +static void update_deadline(struct cfs_rq *cfs_rq, + struct sched_entity *se, bool tick) { + struct rq *rq = rq_of(cfs_rq); + if ((s64)(se->vruntime - se->deadline) < 0) return; @@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) */ se->deadline = se->vruntime + calc_delta_fair(se->slice, se); + if (cfs_rq->nr_running < 2) + return; + /* - * The task has consumed its request, reschedule. + * The task has consumed its request, reschedule; eagerly + * if it ignored our last lazy reschedule. */ - if (cfs_rq->nr_running > 1) { - resched_curr(rq_of(cfs_rq)); - clear_buddies(cfs_rq, se); - } + if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) + __resched_curr(rq, RESCHED_eager); + else + resched_curr(rq); + + clear_buddies(cfs_rq, se); } #include "pelt.h" @@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) /* * Update the current task's runtime statistics. */ -static void update_curr(struct cfs_rq *cfs_rq) +static void __update_curr(struct cfs_rq *cfs_rq, bool tick) { struct sched_entity *curr = cfs_rq->curr; u64 now = rq_clock_task(rq_of(cfs_rq)); @@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq) schedstat_add(cfs_rq->exec_clock, delta_exec); curr->vruntime += calc_delta_fair(delta_exec, curr); - update_deadline(cfs_rq, curr); + update_deadline(cfs_rq, curr, tick); update_min_vruntime(cfs_rq); if (entity_is_task(curr)) { @@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq) account_cfs_rq_runtime(cfs_rq, delta_exec); } +static void update_curr(struct cfs_rq *cfs_rq) +{ + __update_curr(cfs_rq, false); +} + static void update_curr_fair(struct rq *rq) { update_curr(cfs_rq_of(&rq->curr->se)); @@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) /* * Update run-time statistics of the 'current'. */ - update_curr(cfs_rq); + __update_curr(cfs_rq, true); /* * Ensure that runnable average is periodically updated. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a79ce6746dd0..5fdb93f1b87e 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2664,7 +2664,12 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) for_each_sched_rt_entity(rt_se) { if (rt_se->run_list.prev != rt_se->run_list.next) { requeue_task_rt(rq, p, 0); - resched_curr(rq); + + if (test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) + __resched_curr(rq, RESCHED_eager); + else + resched_curr(rq); + return; } } diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 9e1329a4e890..e29a8897f573 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2434,6 +2434,7 @@ extern void init_sched_fair_class(void); extern void reweight_task(struct task_struct *p, int prio); +extern void __resched_curr(struct rq *rq, resched_t rs); extern void resched_curr(struct rq *rq); extern void resched_cpu(int cpu); -- 2.31.1