Re: [RFC PATCH 42/86] sched: force preemption on tick expiration

Ankur Arora <ankur.a.arora@xxxxxxxxxx> · Mon, 20 Nov 2023 22:44:50 -0800

Peter Zijlstra <peterz@xxxxxxxxxxxxx> writes:

> On Tue, Nov 07, 2023 at 01:57:28PM -0800, Ankur Arora wrote:
>
>> diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
>> index 4d86c618ffa2..fe7e5e9b2207 100644
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -1016,8 +1016,11 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se);
>>   * XXX: strictly: vd_i += N*r_i/w_i such that: vd_i > ve_i
>>   * this is probably good enough.
>>   */
>> -static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>> +static void update_deadline(struct cfs_rq *cfs_rq,
>> +			    struct sched_entity *se, bool tick)
>>  {
>> +	struct rq *rq = rq_of(cfs_rq);
>> +
>>  	if ((s64)(se->vruntime - se->deadline) < 0)
>>  		return;
>>
>> @@ -1033,13 +1036,19 @@ static void update_deadline(struct cfs_rq *cfs_rq, struct sched_entity *se)
>>  	 */
>>  	se->deadline = se->vruntime + calc_delta_fair(se->slice, se);
>>
>> +	if (cfs_rq->nr_running < 2)
>> +		return;
>> +
>>  	/*
>> -	 * The task has consumed its request, reschedule.
>> +	 * The task has consumed its request, reschedule; eagerly
>> +	 * if it ignored our last lazy reschedule.
>>  	 */
>> -	if (cfs_rq->nr_running > 1) {
>> -		resched_curr(rq_of(cfs_rq));
>> -		clear_buddies(cfs_rq, se);
>> -	}
>> +	if (tick && test_tsk_thread_flag(rq->curr, TIF_NEED_RESCHED_LAZY))
>> +		__resched_curr(rq, RESCHED_eager);
>> +	else
>> +		resched_curr(rq);
>> +
>> +	clear_buddies(cfs_rq, se);
>>  }
>>
>>  #include "pelt.h"
>> @@ -1147,7 +1156,7 @@ static void update_tg_load_avg(struct cfs_rq *cfs_rq)
>>  /*
>>   * Update the current task's runtime statistics.
>>   */
>> -static void update_curr(struct cfs_rq *cfs_rq)
>> +static void __update_curr(struct cfs_rq *cfs_rq, bool tick)
>>  {
>>  	struct sched_entity *curr = cfs_rq->curr;
>>  	u64 now = rq_clock_task(rq_of(cfs_rq));
>> @@ -1174,7 +1183,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
>>  	schedstat_add(cfs_rq->exec_clock, delta_exec);
>>
>>  	curr->vruntime += calc_delta_fair(delta_exec, curr);
>> -	update_deadline(cfs_rq, curr);
>> +	update_deadline(cfs_rq, curr, tick);
>>  	update_min_vruntime(cfs_rq);
>>
>>  	if (entity_is_task(curr)) {
>> @@ -1188,6 +1197,11 @@ static void update_curr(struct cfs_rq *cfs_rq)
>>  	account_cfs_rq_runtime(cfs_rq, delta_exec);
>>  }
>>
>> +static void update_curr(struct cfs_rq *cfs_rq)
>> +{
>> +	__update_curr(cfs_rq, false);
>> +}
>> +
>>  static void update_curr_fair(struct rq *rq)
>>  {
>>  	update_curr(cfs_rq_of(&rq->curr->se));
>> @@ -5309,7 +5323,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
>>  	/*
>>  	 * Update run-time statistics of the 'current'.
>>  	 */
>> -	update_curr(cfs_rq);
>> +	__update_curr(cfs_rq, true);
>>
>>  	/*
>>  	 * Ensure that runnable average is periodically updated.
>
> I'm thinking this will be less of a mess if you flip it around some.
>
> (ignore the hrtick mess, I'll try and get that cleaned up)
>
> This way you have two distinct sites to handle the preemption. the
> update_curr() would be 'FULL ? force : lazy' while the tick one gets the
> special magic bits.

Thanks that simplified changes here quite nicely.

--
ankur