Patch "sched/psi: use kernfs polling functions for PSI trigger polling" has been added to the 6.4-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    sched/psi: use kernfs polling functions for PSI trigger polling

to the 6.4-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch
and it can be found in the queue-6.4 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.



commit 56dc7c53b82c1b75affc5981051b3679cdfd065f
Author: Suren Baghdasaryan <surenb@xxxxxxxxxx>
Date:   Thu Jun 29 17:56:12 2023 -0700

    sched/psi: use kernfs polling functions for PSI trigger polling
    
    [ Upstream commit aff037078ecaecf34a7c2afab1341815f90fba5e ]
    
    Destroying psi trigger in cgroup_file_release causes UAF issues when
    a cgroup is removed from under a polling process. This is happening
    because cgroup removal causes a call to cgroup_file_release while the
    actual file is still alive. Destroying the trigger at this point would
    also destroy its waitqueue head and if there is still a polling process
    on that file accessing the waitqueue, it will step on the freed pointer:
    
    do_select
      vfs_poll
                               do_rmdir
                                 cgroup_rmdir
                                   kernfs_drain_open_files
                                     cgroup_file_release
                                       cgroup_pressure_release
                                         psi_trigger_destroy
                                           wake_up_pollfree(&t->event_wait)
    // vfs_poll is unblocked
                                           synchronize_rcu
                                           kfree(t)
      poll_freewait -> UAF access to the trigger's waitqueue head
    
    Patch [1] fixed this issue for epoll() case using wake_up_pollfree(),
    however the same issue exists for synchronous poll() case.
    The root cause of this issue is that the lifecycles of the psi trigger's
    waitqueue and of the file associated with the trigger are different. Fix
    this by using kernfs_generic_poll function when polling on cgroup-specific
    psi triggers. It internally uses kernfs_open_node->poll waitqueue head
    with its lifecycle tied to the file's lifecycle. This also renders the
    fix in [1] obsolete, so revert it.
    
    [1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()")
    
    Fixes: 0e94682b73bf ("psi: introduce psi monitor")
    Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@xxxxxxxxxx/
    Reported-by: Lu Jialin <lujialin4@xxxxxxxxxx>
    Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx>
    Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
    Link: https://lkml.kernel.org/r/20230630005612.1014540-1-surenb@xxxxxxxxxx
    Signed-off-by: Sasha Levin <sashal@xxxxxxxxxx>

diff --git a/include/linux/psi.h b/include/linux/psi.h
index ab26200c28033..e0745873e3f26 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags);
 void psi_memstall_leave(unsigned long *flags);
 
 int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
-			char *buf, enum psi_res res, struct file *file);
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+				       enum psi_res res, struct file *file,
+				       struct kernfs_open_file *of);
 void psi_trigger_destroy(struct psi_trigger *t);
 
 __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
index 040c089581c6c..f1fd3a8044e0e 100644
--- a/include/linux/psi_types.h
+++ b/include/linux/psi_types.h
@@ -137,6 +137,9 @@ struct psi_trigger {
 	/* Wait queue for polling */
 	wait_queue_head_t event_wait;
 
+	/* Kernfs file for cgroup triggers */
+	struct kernfs_open_file *of;
+
 	/* Pending event flag */
 	int event;
 
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 4d42f0cbc11ea..3299ec69ce0d1 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -3785,7 +3785,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
 	}
 
 	psi = cgroup_psi(cgrp);
-	new = psi_trigger_create(psi, buf, res, of->file);
+	new = psi_trigger_create(psi, buf, res, of->file, of);
 	if (IS_ERR(new)) {
 		cgroup_put(cgrp);
 		return PTR_ERR(new);
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index e072f6b31bf30..80d8c10e93638 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -494,8 +494,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
 			continue;
 
 		/* Generate an event */
-		if (cmpxchg(&t->event, 0, 1) == 0)
-			wake_up_interruptible(&t->event_wait);
+		if (cmpxchg(&t->event, 0, 1) == 0) {
+			if (t->of)
+				kernfs_notify(t->of->kn);
+			else
+				wake_up_interruptible(&t->event_wait);
+		}
 		t->last_event_time = now;
 		/* Reset threshold breach flag once event got generated */
 		t->pending_event = false;
@@ -1272,8 +1276,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
 	return 0;
 }
 
-struct psi_trigger *psi_trigger_create(struct psi_group *group,
-			char *buf, enum psi_res res, struct file *file)
+struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
+				       enum psi_res res, struct file *file,
+				       struct kernfs_open_file *of)
 {
 	struct psi_trigger *t;
 	enum psi_states state;
@@ -1333,7 +1338,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
 
 	t->event = 0;
 	t->last_event_time = 0;
-	init_waitqueue_head(&t->event_wait);
+	t->of = of;
+	if (!of)
+		init_waitqueue_head(&t->event_wait);
 	t->pending_event = false;
 	t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
 
@@ -1390,7 +1397,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
 	 * being accessed later. Can happen if cgroup is deleted from under a
 	 * polling process.
 	 */
-	wake_up_pollfree(&t->event_wait);
+	if (t->of)
+		kernfs_notify(t->of->kn);
+	else
+		wake_up_interruptible(&t->event_wait);
 
 	if (t->aggregator == PSI_AVGS) {
 		mutex_lock(&group->avgs_lock);
@@ -1462,7 +1472,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
 	if (!t)
 		return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
 
-	poll_wait(file, &t->event_wait, wait);
+	if (t->of)
+		kernfs_generic_poll(t->of, wait);
+	else
+		poll_wait(file, &t->event_wait, wait);
 
 	if (cmpxchg(&t->event, 1, 0) == 1)
 		ret |= EPOLLPRI;
@@ -1532,7 +1545,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
 		return -EBUSY;
 	}
 
-	new = psi_trigger_create(&psi_system, buf, res, file);
+	new = psi_trigger_create(&psi_system, buf, res, file, NULL);
 	if (IS_ERR(new)) {
 		mutex_unlock(&seq->lock);
 		return PTR_ERR(new);



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux