Peter Zijlstra <peterz@xxxxxxxxxxxxx> writes: > On Tue, Nov 07, 2023 at 01:57:28PM -0800, Ankur Arora wrote: > >> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c >> index 4d86c618ffa2..fe7e5e9b2207 100644 >> --- a/kernel/sched/fair.c >> +++ b/kernel/sched/fair.c >> @@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se); >> * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i >> * this is probably good enough. >> */ >> -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) >> +static void update_deadline(struct cfs_rq *cfs_rq, >> + struct sched_entity *se, bool tick) >> { >> + struct rq *rq = rq_of(cfs_rq); >> + >> if ((s64)(se->vruntime - se->deadline) < 0) >> return; >> >> @@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se) >> */ >> se->deadline = se->vruntime + calc_delta_fair(se->slice, se); >> >> + if (cfs_rq->nr_running < 2) >> + return; >> + >> /* >> - * The task has consumed its request, reschedule. >> + * The task has consumed its request, reschedule; eagerly >> + * if it ignored our last lazy reschedule. >> */ >> - if (cfs_rq->nr_running > 1) { >> - resched_curr(rq_of(cfs_rq)); >> - clear_buddies(cfs_rq, se); >> - } >> + if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY)) >> + __resched_curr(rq, RESCHED_eager); >> + else >> + resched_curr(rq); >> + >> + clear_buddies(cfs_rq, se); >> } >> >> #include "pelt.h" >> @@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq) >> /* >> * Update the current task's runtime statistics. >> */ >> -static void update_curr(struct cfs_rq *cfs_rq) >> +static void __update_curr(struct cfs_rq *cfs_rq, bool tick) >> { >> struct sched_entity *curr = cfs_rq->curr; >> u64 now = rq_clock_task(rq_of(cfs_rq)); >> @@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq) >> schedstat_add(cfs_rq->exec_clock, delta_exec); >> >> curr->vruntime += calc_delta_fair(delta_exec, curr); >> - update_deadline(cfs_rq, curr); >> + update_deadline(cfs_rq, curr, tick); >> update_min_vruntime(cfs_rq); >> >> if (entity_is_task(curr)) { >> @@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq) >> account_cfs_rq_runtime(cfs_rq, delta_exec); >> } >> >> +static void update_curr(struct cfs_rq *cfs_rq) >> +{ >> + __update_curr(cfs_rq, false); >> +} >> + >> static void update_curr_fair(struct rq *rq) >> { >> update_curr(cfs_rq_of(&rq->curr->se)); >> @@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) >> /* >> * Update run-time statistics of the 'current'. >> */ >> - update_curr(cfs_rq); >> + __update_curr(cfs_rq, true); >> >> /* >> * Ensure that runnable average is periodically updated. > > I'm thinking this will be less of a mess if you flip it around some. > > (ignore the hrtick mess, I'll try and get that cleaned up) > > This way you have two distinct sites to handle the preemption. the > update_curr() would be 'FULL ? force : lazy' while the tick one gets the > special magic bits. Thanks that simplified changes here quite nicely. -- ankur