On Wed, 15 Jun 2011, Matej Kenda wrote: > Patched Linux kernel is now being used on four systems (COMe type) for > almost two weeks. USB disk where the complete OS is installed did not > unexpectedly disconnect on any of these embedded devices. > > It seems that the your patch solves the problem. I've got a new version of this patch for you to test. In some respects it simplifies things compared to the old patch. (It looks worse than it really is because as part of the simplification, a whole chunk of code got moved out one indentation level.) Let me know what happens with it. I think this will apply to 3.0-rc3, and it should apply to earlier kernels with only minor adjustments. (Fair warning: I haven't tried this on my own system yet.) Alan Stern Index: usb-3.0/drivers/usb/host/ehci.h =================================================================== --- usb-3.0.orig/drivers/usb/host/ehci.h +++ usb-3.0/drivers/usb/host/ehci.h @@ -75,6 +75,7 @@ struct ehci_hcd { /* one per controlle struct ehci_qh *async; struct ehci_qh *dummy; /* For AMD quirk use */ struct ehci_qh *reclaim; + struct ehci_qh *qh_scan_next; unsigned scanning : 1; /* periodic schedule support */ @@ -343,6 +344,7 @@ struct ehci_qh { struct ehci_qh *reclaim; /* next to reclaim */ struct ehci_hcd *ehci; + unsigned long unlink_time; /* * Do NOT use atomic operations for QH refcounting. On some CPUs Index: usb-3.0/drivers/usb/host/ehci-hcd.c =================================================================== --- usb-3.0.orig/drivers/usb/host/ehci-hcd.c +++ usb-3.0/drivers/usb/host/ehci-hcd.c @@ -94,7 +94,8 @@ static const char hcd_name [] = "ehci_hc #define EHCI_IAA_MSECS 10 /* arbitrary */ #define EHCI_IO_JIFFIES (HZ/10) /* io watchdog > irq_thresh */ #define EHCI_ASYNC_JIFFIES (HZ/20) /* async idle timeout */ -#define EHCI_SHRINK_FRAMES 5 /* async qh unlink delay */ +#define EHCI_SHRINK_JIFFIES (DIV_ROUND_UP(HZ, 200) + 1) + /* async qh unlink delay */ /* Initial IRQ latency: faster than hw default */ static int log2_irq_thresh = 0; // 0 to 6 @@ -152,10 +153,7 @@ timer_action(struct ehci_hcd *ehci, enum break; /* case TIMER_ASYNC_SHRINK: */ default: - /* add a jiffie since we synch against the - * 8 KHz uframe counter. - */ - t = DIV_ROUND_UP(EHCI_SHRINK_FRAMES * HZ, 1000) + 1; + t = EHCI_SHRINK_JIFFIES; break; } mod_timer(&ehci->watchdog, t + jiffies); Index: usb-3.0/drivers/usb/host/ehci-q.c =================================================================== --- usb-3.0.orig/drivers/usb/host/ehci-q.c +++ usb-3.0/drivers/usb/host/ehci-q.c @@ -1231,6 +1231,8 @@ static void start_unlink_async (struct e prev->hw->hw_next = qh->hw->hw_next; prev->qh_next = qh->qh_next; + if (ehci->qh_scan_next == qh) + ehci->qh_scan_next = qh->qh_next.qh; wmb (); /* If the controller isn't running, we don't have to wait for it */ @@ -1256,53 +1258,49 @@ static void scan_async (struct ehci_hcd struct ehci_qh *qh; enum ehci_timer_action action = TIMER_IO_WATCHDOG; - ehci->stamp = ehci_readl(ehci, &ehci->regs->frame_index); timer_action_done (ehci, TIMER_ASYNC_SHRINK); -rescan: stopped = !HC_IS_RUNNING(ehci_to_hcd(ehci)->state); - qh = ehci->async->qh_next.qh; - if (likely (qh != NULL)) { - do { - /* clean any finished work for this qh */ - if (!list_empty(&qh->qtd_list) && (stopped || - qh->stamp != ehci->stamp)) { - int temp; - - /* unlinks could happen here; completion - * reporting drops the lock. rescan using - * the latest schedule, but don't rescan - * qhs we already finished (no looping) - * unless the controller is stopped. - */ - qh = qh_get (qh); - qh->stamp = ehci->stamp; - temp = qh_completions (ehci, qh); - if (qh->needs_rescan) - unlink_async(ehci, qh); - qh_put (qh); - if (temp != 0) { - goto rescan; - } - } - - /* unlink idle entries, reducing DMA usage as well - * as HCD schedule-scanning costs. delay for any qh - * we just scanned, there's a not-unusual case that it - * doesn't stay idle for long. - * (plus, avoids some kind of re-activation race.) - */ - if (list_empty(&qh->qtd_list) - && qh->qh_state == QH_STATE_LINKED) { - if (!ehci->reclaim && (stopped || - ((ehci->stamp - qh->stamp) & 0x1fff) - >= EHCI_SHRINK_FRAMES * 8)) - start_unlink_async(ehci, qh); - else - action = TIMER_ASYNC_SHRINK; - } - qh = qh->qh_next.qh; - } while (qh); + ehci->qh_scan_next = ehci->async->qh_next.qh; + while (ehci->qh_scan_next) { + qh = ehci->qh_scan_next; + ehci->qh_scan_next = qh->qh_next.qh; + rescan: + /* clean any finished work for this qh */ + if (!list_empty(&qh->qtd_list)) { + int temp; + + /* + * Unlinks could happen here; completion reporting + * drops the lock. That's why ehci->qh_scan_next + * always holds the next qh to scan; if the next qh + * gets unlinked then ehci->qh_scan_next is adjusted + * in start_unlink_async(). + */ + qh = qh_get(qh); + temp = qh_completions(ehci, qh); + if (qh->needs_rescan) + unlink_async(ehci, qh); + qh->unlink_time = jiffies + EHCI_SHRINK_JIFFIES; + qh_put(qh); + if (temp != 0) + goto rescan; + } + + /* unlink idle entries, reducing DMA usage as well + * as HCD schedule-scanning costs. delay for any qh + * we just scanned, there's a not-unusual case that it + * doesn't stay idle for long. + * (plus, avoids some kind of re-activation race.) + */ + if (list_empty(&qh->qtd_list) + && qh->qh_state == QH_STATE_LINKED) { + if (!ehci->reclaim && (stopped || + time_after_eq(jiffies, qh->unlink_time))) + start_unlink_async(ehci, qh); + else + action = TIMER_ASYNC_SHRINK; + } } if (action == TIMER_ASYNC_SHRINK) timer_action (ehci, TIMER_ASYNC_SHRINK); -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html