Re: USB transaction errors causing RCU stalls and kernel panics

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am Dienstag, den 10.03.2020, 12:04 +0100 schrieb Oliver Neukum:
> Am Dienstag, den 10.03.2020, 10:04 +0000 schrieb Jonas Karlsson:
> 
> > Yes, I have applied that commit. The logs I have attached so far have had that commit applied.
> > It reduces the amount of Unknown event type 37 messages significantly.
> 
> I am a bit confused. If this still happens after you disabled
> autosuspend, the initial diagnosis can't be right. It looks
> like we are entering some kind of busy loop. Can you test
> the attached patches?

Correction: please test these three patches.

	Regards
		Oliver
From e5f91fa32e5294b764ed93615c20834ee3ba690e Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@xxxxxxxx>
Date: Thu, 5 Mar 2020 11:16:02 +0100
Subject: [PATCH 1/3] cdc-acm: close race betrween suspend() and acm_softint

Suspend increments a counter, then kills the URBs,
then kills the scheduled work. The scheduled work, however,
may reschedule the URBs. Fix this by having the work
check the counter.

Signed-off-by: Oliver Neukum <oneukum@xxxxxxxx>
---
 drivers/usb/class/cdc-acm.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 62f4fb9b362f..7d0167382c87 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -557,14 +557,14 @@ static void acm_softint(struct work_struct *work)
 	struct acm *acm = container_of(work, struct acm, work);
 
 	if (test_bit(EVENT_RX_STALL, &acm->flags)) {
-		if (!(usb_autopm_get_interface(acm->data))) {
+		smp_mb(); /* against acm_suspend() */
+		if (!acm->susp_count) {
 			for (i = 0; i < acm->rx_buflimit; i++)
 				usb_kill_urb(acm->read_urbs[i]);
 			usb_clear_halt(acm->dev, acm->in);
 			acm_submit_read_urbs(acm, GFP_KERNEL);
-			usb_autopm_put_interface(acm->data);
+			clear_bit(EVENT_RX_STALL, &acm->flags);
 		}
-		clear_bit(EVENT_RX_STALL, &acm->flags);
 	}
 
 	if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
-- 
2.16.4

From 46f8fe7dbc51d6476bd8589dcbf0fc8a76e94102 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@xxxxxxxx>
Date: Tue, 10 Mar 2020 11:55:21 +0100
Subject: [PATCH 2/3] cdc-acm: introduce a cool down

Immediate submission in case of a babbling device can lead
to a busy loop. Introducing a delayed work.

Signed-off-by: Oliver Neukum <oneukum@xxxxxxxx>
---
 drivers/usb/class/cdc-acm.c | 29 +++++++++++++++++++++++++++--
 drivers/usb/class/cdc-acm.h |  5 ++++-
 2 files changed, 31 insertions(+), 3 deletions(-)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 7d0167382c87..a1fbf6bf5cf4 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -412,9 +412,12 @@ static void acm_ctrl_irq(struct urb *urb)
 
 exit:
 	retval = usb_submit_urb(urb, GFP_ATOMIC);
-	if (retval && retval != -EPERM)
+	if (retval && retval != -EPERM && retval != -ENODEV)
 		dev_err(&acm->control->dev,
 			"%s - usb_submit_urb failed: %d\n", __func__, retval);
+	else
+		dev_vdbg(&acm->control->dev,
+			"control resubmission terminated %d\n", retval);
 }
 
 static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
@@ -430,6 +433,8 @@ static int acm_submit_read_urb(struct acm *acm, int index, gfp_t mem_flags)
 			dev_err(&acm->data->dev,
 				"urb %d failed submission with %d\n",
 				index, res);
+		} else {
+			dev_vdbg(&acm->data->dev, "intended failure %d\n", res);
 		}
 		set_bit(index, &acm->read_urbs_free);
 		return res;
@@ -471,6 +476,7 @@ static void acm_read_bulk_callback(struct urb *urb)
 	int status = urb->status;
 	bool stopped = false;
 	bool stalled = false;
+	bool cooldown = false;
 
 	dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
 		rb->index, urb->actual_length, status);
@@ -497,6 +503,13 @@ static void acm_read_bulk_callback(struct urb *urb)
 			__func__, status);
 		stopped = true;
 		break;
+	case -EOVERFLOW:
+		dev_dbg(&acm->data->dev,
+			"%s - cooling babbling device\n", __func__);
+		usb_mark_last_busy(acm->dev);
+		set_bit(rb->index, &acm->urbs_in_error_delay);
+		cooldown = true;
+		break;
 	default:
 		dev_dbg(&acm->data->dev,
 			"%s - nonzero urb status received: %d\n",
@@ -518,9 +531,11 @@ static void acm_read_bulk_callback(struct urb *urb)
 	 */
 	smp_mb__after_atomic();
 
-	if (stopped || stalled) {
+	if (stopped || stalled || cooldown) {
 		if (stalled)
 			schedule_work(&acm->work);
+		else if (cooldown)
+			schedule_delayed_work(&acm->dwork, HZ / 2);
 		return;
 	}
 
@@ -567,6 +582,12 @@ static void acm_softint(struct work_struct *work)
 		}
 	}
 
+	if (test_and_clear_bit(ACM_ERROR_DELAY, &acm->flags)) {
+		for (i = 0; i < ACM_NR; i++) 
+			if (test_and_clear_bit(i, &acm->urbs_in_error_delay))
+					acm_submit_read_urb(acm, i, GFP_NOIO);
+	}
+
 	if (test_and_clear_bit(EVENT_TTY_WAKEUP, &acm->flags))
 		tty_port_tty_wakeup(&acm->port);
 }
@@ -1325,6 +1346,7 @@ static int acm_probe(struct usb_interface *intf,
 	acm->readsize = readsize;
 	acm->rx_buflimit = num_rx_buf;
 	INIT_WORK(&acm->work, acm_softint);
+	INIT_DELAYED_WORK(&acm->dwork, acm_softint);
 	init_waitqueue_head(&acm->wioctl);
 	spin_lock_init(&acm->write_lock);
 	spin_lock_init(&acm->read_lock);
@@ -1534,6 +1556,7 @@ static void acm_disconnect(struct usb_interface *intf)
 
 	acm_kill_urbs(acm);
 	cancel_work_sync(&acm->work);
+	cancel_delayed_work_sync(&acm->dwork);
 
 	tty_unregister_device(acm_tty_driver, acm->minor);
 
@@ -1576,6 +1599,8 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
 
 	acm_kill_urbs(acm);
 	cancel_work_sync(&acm->work);
+	cancel_delayed_work_sync(&acm->dwork);
+	acm->urbs_in_error_delay = 0;
 
 	return 0;
 }
diff --git a/drivers/usb/class/cdc-acm.h b/drivers/usb/class/cdc-acm.h
index ca1c026382c2..cd5e9d8ab237 100644
--- a/drivers/usb/class/cdc-acm.h
+++ b/drivers/usb/class/cdc-acm.h
@@ -109,8 +109,11 @@ struct acm {
 #		define EVENT_TTY_WAKEUP	0
 #		define EVENT_RX_STALL	1
 #		define ACM_THROTTLED	2
+#		define ACM_ERROR_DELAY	3
+	unsigned long urbs_in_error_delay;		/* these need to be restarted after a delay */
 	struct usb_cdc_line_coding line;		/* bits, stop, parity */
-	struct work_struct work;			/* work queue entry for line discipline waking up */
+	struct work_struct work;			/* work queue entry for various purposes*/
+	struct delayed_work dwork;			/* for cool downs needed in error recovery */
 	unsigned int ctrlin;				/* input control lines (DCD, DSR, RI, break, overruns) */
 	unsigned int ctrlout;				/* output control lines (DTR, RTS) */
 	struct async_icount iocount;			/* counters for control line changes */
-- 
2.16.4

From c5a8ca0a79adfea9dda5ee51f23cf09891687a24 Mon Sep 17 00:00:00 2001
From: Oliver Neukum <oneukum@xxxxxxxx>
Date: Tue, 10 Mar 2020 12:19:53 +0100
Subject: [PATCH 3/3] cdc-acm: also cool doen for EPROTO

---
 drivers/usb/class/cdc-acm.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index a1fbf6bf5cf4..82484cc6c36f 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -504,6 +504,7 @@ static void acm_read_bulk_callback(struct urb *urb)
 		stopped = true;
 		break;
 	case -EOVERFLOW:
+	case -EPROTO:
 		dev_dbg(&acm->data->dev,
 			"%s - cooling babbling device\n", __func__);
 		usb_mark_last_busy(acm->dev);
-- 
2.16.4


[Index of Archives]     [Linux Media]     [Linux Input]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Old Linux USB Devel Archive]

  Powered by Linux