Patch "xen/events: replace evtchn_rwlock with RCU" has been added to the 4.19-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is a note to let you know that I've just added the patch titled

    xen/events: replace evtchn_rwlock with RCU

to the 4.19-stable tree which can be found at:
    http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary

The filename of the patch is:
     xen-events-replace-evtchn_rwlock-with-rcu.patch
and it can be found in the queue-4.19 subdirectory.

If you, or anyone else, feels it should not be added to the stable tree,
please let <stable@xxxxxxxxxxxxxxx> know about it.


>From 0a84f5f87ee564c1da1332629ecd98fcb6958bc2 Mon Sep 17 00:00:00 2001
From: Juergen Gross <jgross@xxxxxxxx>
Date: Mon, 28 Aug 2023 08:09:47 +0200
Subject: xen/events: replace evtchn_rwlock with RCU

From: Juergen Gross <jgross@xxxxxxxx>

commit 87797fad6cce28ec9be3c13f031776ff4f104cfc upstream.

In unprivileged Xen guests event handling can cause a deadlock with
Xen console handling. The evtchn_rwlock and the hvc_lock are taken in
opposite sequence in __hvc_poll() and in Xen console IRQ handling.
Normally this is no problem, as the evtchn_rwlock is taken as a reader
in both paths, but as soon as an event channel is being closed, the
lock will be taken as a writer, which will cause read_lock() to block:

CPU0                     CPU1                CPU2
(IRQ handling)           (__hvc_poll())      (closing event channel)

read_lock(evtchn_rwlock)
                         spin_lock(hvc_lock)
                                             write_lock(evtchn_rwlock)
                                                 [blocks]
spin_lock(hvc_lock)
    [blocks]
                        read_lock(evtchn_rwlock)
                            [blocks due to writer waiting,
                             and not in_interrupt()]

This issue can be avoided by replacing evtchn_rwlock with RCU in
xen_free_irq(). Note that RCU is used only to delay freeing of the
irq_info memory. There is no RCU based dereferencing or replacement of
pointers involved.

In order to avoid potential races between removing the irq_info
reference and handling of interrupts, set the irq_info pointer to NULL
only when freeing its memory. The IRQ itself must be freed at that
time, too, as otherwise the same IRQ number could be allocated again
before handling of the old instance would have been finished.

This is XSA-441 / CVE-2023-34324.

Fixes: 54c9de89895e ("xen/events: add a new "late EOI" evtchn framework")
Reported-by: Marek Marczykowski-Górecki <marmarek@xxxxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Juergen Gross <jgross@xxxxxxxx>
Reviewed-by: Julien Grall <jgrall@xxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
 drivers/xen/events/events_base.c     |   85 ++++++++++++++++++-----------------
 drivers/xen/events/events_internal.h |    2 
 2 files changed, 46 insertions(+), 41 deletions(-)

--- a/drivers/xen/events/events_base.c
+++ b/drivers/xen/events/events_base.c
@@ -83,22 +83,12 @@ const struct evtchn_ops *evtchn_ops;
 static DEFINE_MUTEX(irq_mapping_update_lock);
 
 /*
- * Lock protecting event handling loop against removing event channels.
- * Adding of event channels is no issue as the associated IRQ becomes active
- * only after everything is setup (before request_[threaded_]irq() the handler
- * can't be entered for an event, as the event channel will be unmasked only
- * then).
- */
-static DEFINE_RWLOCK(evtchn_rwlock);
-
-/*
  * Lock hierarchy:
  *
  * irq_mapping_update_lock
- *   evtchn_rwlock
- *     IRQ-desc lock
- *       percpu eoi_list_lock
- *         irq_info->lock
+ *   IRQ-desc lock
+ *     percpu eoi_list_lock
+ *       irq_info->lock
  */
 
 static LIST_HEAD(xen_irq_list_head);
@@ -213,6 +203,22 @@ static void set_info_for_irq(unsigned in
 		irq_set_chip_data(irq, info);
 }
 
+static void delayed_free_irq(struct work_struct *work)
+{
+	struct irq_info *info = container_of(to_rcu_work(work), struct irq_info,
+					     rwork);
+	unsigned int irq = info->irq;
+
+	/* Remove the info pointer only now, with no potential users left. */
+	set_info_for_irq(irq, NULL);
+
+	kfree(info);
+
+	/* Legacy IRQ descriptors are managed by the arch. */
+	if (irq >= nr_legacy_irqs())
+		irq_free_desc(irq);
+}
+
 /* Constructors for packed IRQ information. */
 static int xen_irq_info_common_setup(struct irq_info *info,
 				     unsigned irq,
@@ -547,33 +553,36 @@ static void xen_irq_lateeoi_worker(struc
 
 	eoi = container_of(to_delayed_work(work), struct lateeoi_work, delayed);
 
-	read_lock_irqsave(&evtchn_rwlock, flags);
+	rcu_read_lock();
 
 	while (true) {
-		spin_lock(&eoi->eoi_list_lock);
+		spin_lock_irqsave(&eoi->eoi_list_lock, flags);
 
 		info = list_first_entry_or_null(&eoi->eoi_list, struct irq_info,
 						eoi_list);
 
-		if (info == NULL || now < info->eoi_time) {
-			spin_unlock(&eoi->eoi_list_lock);
+		if (info == NULL)
+			break;
+
+		if (now < info->eoi_time) {
+			mod_delayed_work_on(info->eoi_cpu, system_wq,
+					    &eoi->delayed,
+					    info->eoi_time - now);
 			break;
 		}
 
 		list_del_init(&info->eoi_list);
 
-		spin_unlock(&eoi->eoi_list_lock);
+		spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
 
 		info->eoi_time = 0;
 
 		xen_irq_lateeoi_locked(info, false);
 	}
 
-	if (info)
-		mod_delayed_work_on(info->eoi_cpu, system_wq,
-				    &eoi->delayed, info->eoi_time - now);
+	spin_unlock_irqrestore(&eoi->eoi_list_lock, flags);
 
-	read_unlock_irqrestore(&evtchn_rwlock, flags);
+	rcu_read_unlock();
 }
 
 static void xen_cpu_init_eoi(unsigned int cpu)
@@ -588,16 +597,15 @@ static void xen_cpu_init_eoi(unsigned in
 void xen_irq_lateeoi(unsigned int irq, unsigned int eoi_flags)
 {
 	struct irq_info *info;
-	unsigned long flags;
 
-	read_lock_irqsave(&evtchn_rwlock, flags);
+	rcu_read_lock();
 
 	info = info_for_irq(irq);
 
 	if (info)
 		xen_irq_lateeoi_locked(info, eoi_flags & XEN_EOI_FLAG_SPURIOUS);
 
-	read_unlock_irqrestore(&evtchn_rwlock, flags);
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(xen_irq_lateeoi);
 
@@ -616,6 +624,7 @@ static void xen_irq_init(unsigned irq)
 
 	info->type = IRQT_UNBOUND;
 	info->refcnt = -1;
+	INIT_RCU_WORK(&info->rwork, delayed_free_irq);
 
 	set_info_for_irq(irq, info);
 
@@ -668,31 +677,18 @@ static int __must_check xen_allocate_irq
 static void xen_free_irq(unsigned irq)
 {
 	struct irq_info *info = info_for_irq(irq);
-	unsigned long flags;
 
 	if (WARN_ON(!info))
 		return;
 
-	write_lock_irqsave(&evtchn_rwlock, flags);
-
 	if (!list_empty(&info->eoi_list))
 		lateeoi_list_del(info);
 
 	list_del(&info->list);
 
-	set_info_for_irq(irq, NULL);
-
 	WARN_ON(info->refcnt > 0);
 
-	write_unlock_irqrestore(&evtchn_rwlock, flags);
-
-	kfree(info);
-
-	/* Legacy IRQ descriptors are managed by the arch. */
-	if (irq < nr_legacy_irqs())
-		return;
-
-	irq_free_desc(irq);
+	queue_rcu_work(system_wq, &info->rwork);
 }
 
 static void xen_evtchn_close(unsigned int port)
@@ -1603,7 +1599,14 @@ static void __xen_evtchn_do_upcall(void)
 	unsigned count;
 	struct evtchn_loop_ctrl ctrl = { 0 };
 
-	read_lock(&evtchn_rwlock);
+	/*
+	 * When closing an event channel the associated IRQ must not be freed
+	 * until all cpus have left the event handling loop. This is ensured
+	 * by taking the rcu_read_lock() while handling events, as freeing of
+	 * the IRQ is handled via queue_rcu_work() _after_ closing the event
+	 * channel.
+	 */
+	rcu_read_lock();
 
 	do {
 		vcpu_info->evtchn_upcall_pending = 0;
@@ -1620,7 +1623,7 @@ static void __xen_evtchn_do_upcall(void)
 	} while (count != 1 || vcpu_info->evtchn_upcall_pending);
 
 out:
-	read_unlock(&evtchn_rwlock);
+	rcu_read_unlock();
 
 	/*
 	 * Increment irq_epoch only now to defer EOIs only for
--- a/drivers/xen/events/events_internal.h
+++ b/drivers/xen/events/events_internal.h
@@ -8,6 +8,7 @@
  */
 #ifndef __EVENTS_INTERNAL_H__
 #define __EVENTS_INTERNAL_H__
+#include <linux/rcupdate.h>
 
 /* Interrupt types. */
 enum xen_irq_type {
@@ -33,6 +34,7 @@ enum xen_irq_type {
 struct irq_info {
 	struct list_head list;
 	struct list_head eoi_list;
+	struct rcu_work rwork;
 	short refcnt;
 	short spurious_cnt;
 	short type;		/* type */


Patches currently in stable-queue which might be from jgross@xxxxxxxx are

queue-4.19/xen-events-replace-evtchn_rwlock-with-rcu.patch



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux