Patch "sched/fair: Add EAS checks before updating root_domain::overutilized" has been added to the 6.8-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    sched/fair: Add EAS checks before updating root_domain::overutilized

to the 6.8-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     sched-fair-add-eas-checks-before-updating-root_domai.patch
and it can be found in the queue-6.8 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit ba2bbd9a790870e64ba072e54e314a23d19a2f19
Author: Shrikanth Hegde <sshegde@xxxxxxxxxxxxx>
Date:   Thu Mar 7 14:27:23 2024 +0530

    sched/fair: Add EAS checks before updating root_domain::overutilized
    
    [ Upstream commit be3a51e68f2f1b17250ce40d8872c7645b7a2991 ]
    
    root_domain::overutilized is only used for EAS(energy aware scheduler)
    to decide whether to do load balance or not. It is not used if EAS
    not possible.
    
    Currently enqueue_task_fair and task_tick_fair accesses, sometime updates
    this field. In update_sd_lb_stats it is updated often. This causes cache
    contention due to true sharing and burns a lot of cycles. ::overload and
    ::overutilized are part of the same cacheline. Updating it often invalidates
    the cacheline. That causes access to ::overload to slow down due to
    false sharing. Hence add EAS check before accessing/updating this field.
    EAS check is optimized at compile time or it is a static branch.
    Hence it shouldn't cost much.
    
    With the patch, both enqueue_task_fair and newidle_balance don't show
    up as hot routines in perf profile.
    
      6.8-rc4:
      7.18%  swapper          [kernel.vmlinux]              [k] enqueue_task_fair
      6.78%  s                [kernel.vmlinux]              [k] newidle_balance
    
      +patch:
      0.14%  swapper          [kernel.vmlinux]              [k] enqueue_task_fair
      0.00%  swapper          [kernel.vmlinux]              [k] newidle_balance
    
    While at it: trace_sched_overutilized_tp expect that second argument to
    be bool. So do a int to bool conversion for that.
    
    Fixes: 2802bf3cd936 ("sched/fair: Add over-utilization/tipping point indicator")
    Signed-off-by: Shrikanth Hegde <sshegde@xxxxxxxxxxxxx>
    Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
    Reviewed-by: Qais Yousef <qyousef@xxxxxxxxxxx>
    Reviewed-by: Srikar Dronamraju <srikar@xxxxxxxxxxxxx>
    Reviewed-by: Vincent Guittot <vincent.guittot@xxxxxxxxxx>
    Link: https://lore.kernel.org/r/20240307085725.444486-2-sshegde@xxxxxxxxxxxxx
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index aee5e7a70170c..a01269ed968a7 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6669,22 +6669,42 @@ static inline void hrtick_update(struct rq *rq)
 #ifdef CONFIG_SMP
 static inline bool cpu_overutilized(int cpu)
 {
-	unsigned long rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
-	unsigned long rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
+	unsigned long  rq_util_min, rq_util_max;
+
+	if (!sched_energy_enabled())
+		return false;
+
+	rq_util_min = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MIN);
+	rq_util_max = uclamp_rq_get(cpu_rq(cpu), UCLAMP_MAX);
 
 	/* Return true only if the utilization doesn't fit CPU's capacity */
 	return !util_fits_cpu(cpu_util_cfs(cpu), rq_util_min, rq_util_max, cpu);
 }
 
-static inline void update_overutilized_status(struct rq *rq)
+static inline void set_rd_overutilized_status(struct root_domain *rd,
+					      unsigned int status)
 {
-	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu)) {
-		WRITE_ONCE(rq->rd->overutilized, SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rq->rd, SG_OVERUTILIZED);
-	}
+	if (!sched_energy_enabled())
+		return;
+
+	WRITE_ONCE(rd->overutilized, status);
+	trace_sched_overutilized_tp(rd, !!status);
+}
+
+static inline void check_update_overutilized_status(struct rq *rq)
+{
+	/*
+	 * overutilized field is used for load balancing decisions only
+	 * if energy aware scheduler is being used
+	 */
+	if (!sched_energy_enabled())
+		return;
+
+	if (!READ_ONCE(rq->rd->overutilized) && cpu_overutilized(rq->cpu))
+		set_rd_overutilized_status(rq->rd, SG_OVERUTILIZED);
 }
 #else
-static inline void update_overutilized_status(struct rq *rq) { }
+static inline void check_update_overutilized_status(struct rq *rq) { }
 #endif
 
 /* Runqueue only has SCHED_IDLE tasks enqueued */
@@ -6785,7 +6805,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 	 * and the following generally works well enough in practice.
 	 */
 	if (!task_new)
-		update_overutilized_status(rq);
+		check_update_overutilized_status(rq);
 
 enqueue_throttle:
 	assert_list_leaf_cfs_rq(rq);
@@ -10621,19 +10641,14 @@ static inline void update_sd_lb_stats(struct lb_env *env, struct sd_lb_stats *sd
 		env->fbq_type = fbq_classify_group(&sds->busiest_stat);
 
 	if (!env->sd->parent) {
-		struct root_domain *rd = env->dst_rq->rd;
-
 		/* update overload indicator if we are at root domain */
-		WRITE_ONCE(rd->overload, sg_status & SG_OVERLOAD);
+		WRITE_ONCE(env->dst_rq->rd->overload, sg_status & SG_OVERLOAD);
 
 		/* Update over-utilization (tipping point, U >= 0) indicator */
-		WRITE_ONCE(rd->overutilized, sg_status & SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rd, sg_status & SG_OVERUTILIZED);
+		set_rd_overutilized_status(env->dst_rq->rd,
+					   sg_status & SG_OVERUTILIZED);
 	} else if (sg_status & SG_OVERUTILIZED) {
-		struct root_domain *rd = env->dst_rq->rd;
-
-		WRITE_ONCE(rd->overutilized, SG_OVERUTILIZED);
-		trace_sched_overutilized_tp(rd, SG_OVERUTILIZED);
+		set_rd_overutilized_status(env->dst_rq->rd, SG_OVERUTILIZED);
 	}
 
 	update_idle_cpu_scan(env, sum_util);
@@ -12639,7 +12654,7 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
 		task_tick_numa(rq, curr);
 
 	update_misfit_status(curr, rq);
-	update_overutilized_status(task_rq(curr));
+	check_update_overutilized_status(task_rq(curr));
 
 	task_tick_core(rq, curr);
 }




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux