Re: [RFC PATCH v3 10/10] sched/fair: Throttle CFS tasks on return to userspace

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Fri, 12 Jul 2024 19:43:13 +0200

On Thu, Jul 11, 2024 at 03:00:04PM +0200, Valentin Schneider wrote:

> +static void throttle_one_task(struct cfs_rq *cfs_rq, struct task_struct *p)
>  {
> +	long task_delta, idle_task_delta;
> +	struct sched_entity *se = &p->se;
> +
> +	list_add(&p->throttle_node, &cfs_rq->throttled_limbo_list);
>  
> +	task_delta = 1;
> +	idle_task_delta = cfs_rq_is_idle(cfs_rq) ? 1 : 0;
> +
> +	for_each_sched_entity(se) {
> +		cfs_rq = cfs_rq_of(se);
> +
> +		if (!se->on_rq)
> +			return;
> +
> +		dequeue_entity(cfs_rq, se, DEQUEUE_SLEEP);
> +		cfs_rq->h_nr_running -= task_delta;
> +		cfs_rq->idle_h_nr_running -= idle_task_delta;
> +
> +		if (cfs_rq->load.weight) {
> +			/* Avoid re-evaluating load for this entity: */
> +			se = parent_entity(se);
> +			break;
> +		}
> +	}
> +
> +	for_each_sched_entity(se) {
> +		cfs_rq = cfs_rq_of(se);
> +		/* throttled entity or throttle-on-deactivate */
> +		if (!se->on_rq)
> +			goto throttle_done;
> +
> +		update_load_avg(cfs_rq, se, 0);
> +		se_update_runnable(se);
> +		cfs_rq->h_nr_running -= task_delta;
> +		cfs_rq->h_nr_running -= idle_task_delta;
> +	}
> +
> +throttle_done:
> +	/* At this point se is NULL and we are at root level*/
> +	sub_nr_running(rq_of(cfs_rq), 1);
>  }

I know you're just moving code around, but we should look if we can
share code between this and dequeue_task_fair().

I have patches around this in that eevdf series I should send out again,
I'll try and have a stab.

> -static void task_throttle_cancel_irq_work_fn(struct irq_work *work)
> +static void throttle_cfs_rq_work(struct callback_head *work)
>  {
> -       /* Write me */
> +	struct task_struct *p = container_of(work, struct task_struct, sched_throttle_work);
> +	struct sched_entity *se;
> +	struct rq *rq;
> +	struct cfs_rq *cfs_rq;
> +
> +	WARN_ON_ONCE(p != current);
> +	p->sched_throttle_work.next = &p->sched_throttle_work;
> +	/*
> +	 * If task is exiting, then there won't be a return to userspace, so we
> +	 * don't have to bother with any of this.
> +	 */
> +	if ((p->flags & PF_EXITING))
> +		return;
> +
> +	CLASS(task_rq_lock, rq_guard)(p);
> +	rq = rq_guard.rq;

The other way to write this is:

	scoped_guard (task_rq_lock, p) {
		struct rq *rq = scope.rq;

> +	se = &p->se;
> +	cfs_rq = cfs_rq_of(se);
> +
> +	/*
> +	 * If not in limbo, then either replenish has happened or this task got
> +	 * migrated out of the throttled cfs_rq, move along
> +	 */
> +	if (!cfs_rq->throttle_count)
> +		return;
> +
> +	update_rq_clock(rq);
> +
> +	throttle_one_task(cfs_rq, p);
> +
> +	resched_curr(rq);

	}

>  }