[PATCH 2/2] USB: EHCI: add a delay when unlinking an active QH

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Michael Reutman reports that an AMD/ATI EHCI host controller on one of
his computers does not stop transferring data when an active bulk QH
is unlinked from the async schedule.  Apparently that host controller
fails to implement the IAA mechanism correctly when an active QH is
unlinked.  This leads to data corruption, because the controller
continues to update the QH in memory when the driver doesn't expect
it.  As a result the next URB submitted for that QH can hang, because
the link pointers for the TD queue have been messed up.  This
misbehavior is observed quite regularly.

To be fair, the EHCI spec (section 4.8.2) says that active QHs should
not be unlinked.  It goes on to recommend a procedure that involves
waiting for the QH to go inactive before unlinking it.  In the real
world this is impractical, not least because the QH may _never_ go
inactive.  (What were they thinking?)  Sometimes we have no choice but
to unlink an active QH.

In an attempt to avoid the problems that can ensue, this patch changes
how the driver decides when the unlink is complete.  In addition to
waiting through two IAA cycles, in cases where the QH was not known to
be inactive beforehand we now wait until a 2-ms period has elapsed
with the host controller making no change to the QH data structure
(the hw_current and hw_token fields in particular).  The intuition
here is that after such a long period, the endpoint must be NAKing and
hopefully the QH has been dropped from the host controller's internal
cache.  There's no way to know if this reasoning is really valid --
the spec is no help in this regard -- but at least this approach fixes
Michael's problem.

The test for whether the QH is already known to be inactive involves
the reason for unlinking the QH originally.  If it was unlinked
because it had halted, or it stopped in response to a short read, or
it overlaid a dummy TD (a silicon bug), then it certainly is inactive.
If it was unlinked because the TD queue was empty and no TDs have been
added to the queue in the meantime, then it must be inactive.  Or if
the hardware status indicates that the QH is currently halted (even if
that wasn't the reason for unlinking it), then it is inactive.
Otherwise, if none of those checks apply, we go through the 2-ms
delay.

Signed-off-by: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx>
Reported-by: Michael Reutman <mreutman@xxxxxxxxxxxxxxxxx>
Tested-by: Michael Reutman <mreutman@xxxxxxxxxxxxxxxxx>

---


[as1793]


 drivers/usb/host/ehci-hcd.c   |    6 ++-
 drivers/usb/host/ehci-q.c     |   73 +++++++++++++++++++++++++++++++++++-------
 drivers/usb/host/ehci-timer.c |    4 +-
 drivers/usb/host/ehci.h       |    3 +
 4 files changed, 73 insertions(+), 13 deletions(-)

Index: usb-4.3/drivers/usb/host/ehci.h
===================================================================
--- usb-4.3.orig/drivers/usb/host/ehci.h
+++ usb-4.3/drivers/usb/host/ehci.h
@@ -110,6 +110,7 @@ enum ehci_hrtimer_event {
 	EHCI_HRTIMER_POLL_DEAD,		/* Wait for dead controller to stop */
 	EHCI_HRTIMER_UNLINK_INTR,	/* Wait for interrupt QH unlink */
 	EHCI_HRTIMER_FREE_ITDS,		/* Wait for unused iTDs and siTDs */
+	EHCI_HRTIMER_ACTIVE_UNLINK,	/* Wait while unlinking an active QH */
 	EHCI_HRTIMER_START_UNLINK_INTR, /* Unlink empty interrupt QHs */
 	EHCI_HRTIMER_ASYNC_UNLINKS,	/* Unlink empty async QHs */
 	EHCI_HRTIMER_IAA_WATCHDOG,	/* Handle lost IAA interrupts */
@@ -156,6 +157,8 @@ struct ehci_hcd {			/* one per controlle
 	struct list_head	async_idle;
 	unsigned		async_unlink_cycle;
 	unsigned		async_count;	/* async activity count */
+	__hc32			old_current;	/* Test for QH becoming */
+	__hc32			old_token;	/*  inactive during unlink */
 
 	/* periodic schedule support */
 #define	DEFAULT_I_TDPS		1024		/* some HCs can do less */
Index: usb-4.3/drivers/usb/host/ehci-hcd.c
===================================================================
--- usb-4.3.orig/drivers/usb/host/ehci-hcd.c
+++ usb-4.3/drivers/usb/host/ehci-hcd.c
@@ -306,6 +306,7 @@ static void ehci_quiesce (struct ehci_hc
 
 /*-------------------------------------------------------------------------*/
 
+static void end_iaa_cycle(struct ehci_hcd *ehci);
 static void end_unlink_async(struct ehci_hcd *ehci);
 static void unlink_empty_async(struct ehci_hcd *ehci);
 static void unlink_empty_async_suspended(struct ehci_hcd *ehci);
@@ -565,6 +566,9 @@ static int ehci_init(struct usb_hcd *hcd
 	/* Accept arbitrarily long scatter-gather lists */
 	if (!(hcd->driver->flags & HCD_LOCAL_MEM))
 		hcd->self.sg_tablesize = ~0;
+
+	/* Prepare for unlinking active QHs */
+	ehci->old_current = ~0;
 	return 0;
 }
 
@@ -758,7 +762,7 @@ static irqreturn_t ehci_irq (struct usb_
 			ehci_dbg(ehci, "IAA with IAAD still set?\n");
 		if (ehci->iaa_in_progress)
 			COUNT(ehci->stats.iaa);
-		end_unlink_async(ehci);
+		end_iaa_cycle(ehci);
 	}
 
 	/* remote wakeup [4.3.1] */
Index: usb-4.3/drivers/usb/host/ehci-q.c
===================================================================
--- usb-4.3.orig/drivers/usb/host/ehci-q.c
+++ usb-4.3/drivers/usb/host/ehci-q.c
@@ -1305,13 +1305,8 @@ static void start_iaa_cycle(struct ehci_
 	}
 }
 
-/* the async qh for the qtds being unlinked are now gone from the HC */
-
-static void end_unlink_async(struct ehci_hcd *ehci)
+static void end_iaa_cycle(struct ehci_hcd *ehci)
 {
-	struct ehci_qh		*qh;
-	bool			early_exit;
-
 	if (ehci->has_synopsys_hc_bug)
 		ehci_writel(ehci, (u32) ehci->async->qh_dma,
 			    &ehci->regs->async_next);
@@ -1319,6 +1314,16 @@ static void end_unlink_async(struct ehci
 	/* The current IAA cycle has ended */
 	ehci->iaa_in_progress = false;
 
+	end_unlink_async(ehci);
+}
+
+/* See if the async qh for the qtds being unlinked are now gone from the HC */
+
+static void end_unlink_async(struct ehci_hcd *ehci)
+{
+	struct ehci_qh		*qh;
+	bool			early_exit;
+
 	if (list_empty(&ehci->async_unlink))
 		return;
 	qh = list_first_entry(&ehci->async_unlink, struct ehci_qh,
@@ -1339,14 +1344,60 @@ static void end_unlink_async(struct ehci
 	 * after the IAA interrupt occurs.  In self-defense, always go
 	 * through two IAA cycles for each QH.
 	 */
-	else if (qh->qh_state == QH_STATE_UNLINK_WAIT) {
+	else if (qh->qh_state == QH_STATE_UNLINK) {
+		/*
+		 * Second IAA cycle has finished.  Process only the first
+		 * waiting QH (NVIDIA (?) bug).
+		 */
+		list_move_tail(&qh->unlink_node, &ehci->async_idle);
+	}
+
+	/*
+	 * AMD/ATI (?) bug: The HC can continue to use an active QH long
+	 * after the IAA interrupt occurs.  To prevent problems, QHs that
+	 * may still be active will wait until 2 ms have passed with no
+	 * change to the hw_current and hw_token fields (this delay occurs
+	 * between the two IAA cycles).
+	 *
+	 * The EHCI spec (4.8.2) says that active QHs must not be removed
+	 * from the async schedule and recommends waiting until the QH
+	 * goes inactive.  This is ridiculous because the QH will _never_
+	 * become inactive if the endpoint NAKs indefinitely.
+	 */
+
+	/* Some reasons for unlinking guarantee the QH can't be active */
+	else if (qh->unlink_reason & (QH_UNLINK_HALTED |
+			QH_UNLINK_SHORT_READ | QH_UNLINK_DUMMY_OVERLAY))
+		goto DelayDone;
+
+	/* The QH can't be active if the queue was and still is empty... */
+	else if	((qh->unlink_reason & QH_UNLINK_QUEUE_EMPTY) &&
+			list_empty(&qh->qtd_list))
+		goto DelayDone;
+
+	/* ... or if the QH has halted */
+	else if	(qh->hw->hw_token & cpu_to_hc32(ehci, QTD_STS_HALT))
+		goto DelayDone;
+
+	/* Otherwise we have to wait until the QH stops changing */
+	else {
+		__hc32		qh_current, qh_token;
+
+		qh_current = qh->hw->hw_current;
+		qh_token = qh->hw->hw_token;
+		if (qh_current != ehci->old_current ||
+				qh_token != ehci->old_token) {
+			ehci->old_current = qh_current;
+			ehci->old_token = qh_token;
+			ehci_enable_event(ehci,
+					EHCI_HRTIMER_ACTIVE_UNLINK, true);
+			return;
+		}
+ DelayDone:
 		qh->qh_state = QH_STATE_UNLINK;
 		early_exit = true;
 	}
-
-	/* Otherwise process only the first waiting QH (NVIDIA bug?) */
-	else
-		list_move_tail(&qh->unlink_node, &ehci->async_idle);
+	ehci->old_current = ~0;		/* Prepare for next QH */
 
 	/* Start a new IAA cycle if any QHs are waiting for it */
 	if (!list_empty(&ehci->async_unlink))
Index: usb-4.3/drivers/usb/host/ehci-timer.c
===================================================================
--- usb-4.3.orig/drivers/usb/host/ehci-timer.c
+++ usb-4.3/drivers/usb/host/ehci-timer.c
@@ -72,6 +72,7 @@ static unsigned event_delays_ns[] = {
 	1 * NSEC_PER_MSEC,	/* EHCI_HRTIMER_POLL_DEAD */
 	1125 * NSEC_PER_USEC,	/* EHCI_HRTIMER_UNLINK_INTR */
 	2 * NSEC_PER_MSEC,	/* EHCI_HRTIMER_FREE_ITDS */
+	2 * NSEC_PER_MSEC,	/* EHCI_HRTIMER_ACTIVE_UNLINK */
 	5 * NSEC_PER_MSEC,	/* EHCI_HRTIMER_START_UNLINK_INTR */
 	6 * NSEC_PER_MSEC,	/* EHCI_HRTIMER_ASYNC_UNLINKS */
 	10 * NSEC_PER_MSEC,	/* EHCI_HRTIMER_IAA_WATCHDOG */
@@ -361,7 +362,7 @@ static void ehci_iaa_watchdog(struct ehc
 	}
 
 	ehci_dbg(ehci, "IAA watchdog: status %x cmd %x\n", status, cmd);
-	end_unlink_async(ehci);
+	end_iaa_cycle(ehci);
 }
 
 
@@ -395,6 +396,7 @@ static void (*event_handlers[])(struct e
 	ehci_handle_controller_death,	/* EHCI_HRTIMER_POLL_DEAD */
 	ehci_handle_intr_unlinks,	/* EHCI_HRTIMER_UNLINK_INTR */
 	end_free_itds,			/* EHCI_HRTIMER_FREE_ITDS */
+	end_unlink_async,		/* EHCI_HRTIMER_ACTIVE_UNLINK */
 	ehci_handle_start_intr_unlinks,	/* EHCI_HRTIMER_START_UNLINK_INTR */
 	unlink_empty_async,		/* EHCI_HRTIMER_ASYNC_UNLINKS */
 	ehci_iaa_watchdog,		/* EHCI_HRTIMER_IAA_WATCHDOG */

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Media]     [Linux Input]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Old Linux USB Devel Archive]

  Powered by Linux