Patch "sched/dlserver: Fix dlserver double enqueue" has been added to the 6.12-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    sched/dlserver: Fix dlserver double enqueue

to the 6.12-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     sched-dlserver-fix-dlserver-double-enqueue.patch
and it can be found in the queue-6.12 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit 44c8bb66dcae7fbaaa7077cd0e045d83aac1b658
Author: Vineeth Pillai (Google) <vineeth@xxxxxxxxxxxxxxx>
Date:   Thu Dec 12 22:22:36 2024 -0500

    sched/dlserver: Fix dlserver double enqueue
    
    [ Upstream commit b53127db1dbf7f1047cf35c10922d801dcd40324 ]
    
    dlserver can get dequeued during a dlserver pick_task due to the delayed
    deueue feature and this can lead to issues with dlserver logic as it
    still thinks that dlserver is on the runqueue. The dlserver throttling
    and replenish logic gets confused and can lead to double enqueue of
    dlserver.
    
    Double enqueue of dlserver could happend due to couple of reasons:
    
    Case 1
    ------
    
    Delayed dequeue feature[1] can cause dlserver being stopped during a
    pick initiated by dlserver:
      __pick_next_task
       pick_task_dl -> server_pick_task
        pick_task_fair
         pick_next_entity (if (sched_delayed))
          dequeue_entities
           dl_server_stop
    
    server_pick_task goes ahead with update_curr_dl_se without knowing that
    dlserver is dequeued and this confuses the logic and may lead to
    unintended enqueue while the server is stopped.
    
    Case 2
    ------
    A race condition between a task dequeue on one cpu and same task's enqueue
    on this cpu by a remote cpu while the lock is released causing dlserver
    double enqueue.
    
    One cpu would be in the schedule() and releasing RQ-lock:
    
    current->state = TASK_INTERRUPTIBLE();
            schedule();
              deactivate_task()
                dl_stop_server();
              pick_next_task()
                pick_next_task_fair()
                  sched_balance_newidle()
                    rq_unlock(this_rq)
    
    at which point another CPU can take our RQ-lock and do:
    
            try_to_wake_up()
              ttwu_queue()
                rq_lock()
                ...
                activate_task()
                  dl_server_start() --> first enqueue
                wakeup_preempt() := check_preempt_wakeup_fair()
                  update_curr()
                    update_curr_task()
                      if (current->dl_server)
                        dl_server_update()
                          enqueue_dl_entity() --> second enqueue
    
    This bug was not apparent as the enqueue in dl_server_start doesn't
    usually happen because of the defer logic. But as a side effect of the
    first case(dequeue during dlserver pick), dl_throttled and dl_yield will
    be set and this causes the time accounting of dlserver to messup and
    then leading to a enqueue in dl_server_start.
    
    Have an explicit flag representing the status of dlserver to avoid the
    confusion. This is set in dl_server_start and reset in dlserver_stop.
    
    Fixes: 63ba8422f876 ("sched/deadline: Introduce deadline servers")
    Suggested-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
    Signed-off-by: "Vineeth Pillai (Google)" <vineeth@xxxxxxxxxxxxxxx>
    Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
    Tested-by: Marcel Ziswiler <marcel.ziswiler@xxxxxxxxxxxxxxx> # ROCK 5B
    Link: https://lkml.kernel.org/r/20241213032244.877029-1-vineeth@xxxxxxxxxxxxxxx
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/include/linux/sched.h b/include/linux/sched.h
index bb343136ddd0..c14446c6164d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -656,6 +656,12 @@ struct sched_dl_entity {
 	 * @dl_defer_armed tells if the deferrable server is waiting
 	 * for the replenishment timer to activate it.
 	 *
+	 * @dl_server_active tells if the dlserver is active(started).
+	 * dlserver is started on first cfs enqueue on an idle runqueue
+	 * and is stopped when a dequeue results in 0 cfs tasks on the
+	 * runqueue. In other words, dlserver is active only when cpu's
+	 * runqueue has atleast one cfs task.
+	 *
 	 * @dl_defer_running tells if the deferrable server is actually
 	 * running, skipping the defer phase.
 	 */
@@ -664,6 +670,7 @@ struct sched_dl_entity {
 	unsigned int			dl_non_contending : 1;
 	unsigned int			dl_overrun	  : 1;
 	unsigned int			dl_server         : 1;
+	unsigned int			dl_server_active  : 1;
 	unsigned int			dl_defer	  : 1;
 	unsigned int			dl_defer_armed	  : 1;
 	unsigned int			dl_defer_running  : 1;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index fc6f41ac33eb..a17c23b53049 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1647,6 +1647,7 @@ void dl_server_start(struct sched_dl_entity *dl_se)
 	if (!dl_se->dl_runtime)
 		return;
 
+	dl_se->dl_server_active = 1;
 	enqueue_dl_entity(dl_se, ENQUEUE_WAKEUP);
 	if (!dl_task(dl_se->rq->curr) || dl_entity_preempt(dl_se, &rq->curr->dl))
 		resched_curr(dl_se->rq);
@@ -1661,6 +1662,7 @@ void dl_server_stop(struct sched_dl_entity *dl_se)
 	hrtimer_try_to_cancel(&dl_se->dl_timer);
 	dl_se->dl_defer_armed = 0;
 	dl_se->dl_throttled = 0;
+	dl_se->dl_server_active = 0;
 }
 
 void dl_server_init(struct sched_dl_entity *dl_se, struct rq *rq,
@@ -2420,8 +2422,10 @@ static struct task_struct *__pick_task_dl(struct rq *rq)
 	if (dl_server(dl_se)) {
 		p = dl_se->server_pick_task(dl_se);
 		if (!p) {
-			dl_se->dl_yielded = 1;
-			update_curr_dl_se(rq, dl_se, 0);
+			if (dl_server_active(dl_se)) {
+				dl_se->dl_yielded = 1;
+				update_curr_dl_se(rq, dl_se, 0);
+			}
 			goto again;
 		}
 		rq->dl_server = dl_se;
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c53696275ca1..f2ef520513c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -398,6 +398,11 @@ extern void __dl_server_attach_root(struct sched_dl_entity *dl_se, struct rq *rq
 extern int dl_server_apply_params(struct sched_dl_entity *dl_se,
 		    u64 runtime, u64 period, bool init);
 
+static inline bool dl_server_active(struct sched_dl_entity *dl_se)
+{
+	return dl_se->dl_server_active;
+}
+
 #ifdef CONFIG_CGROUP_SCHED
 
 extern struct list_head task_groups;




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux