Sarah (and anyone else who's interested): A while ago I wrote about a hardware bug in my Intel ICH5 and ICH8 EHCI controllers. You pointed out that these are rather old components, not being used in current systems, which is quite true. Now I have figured out a simple way for anyone to test for this bug in any EHCI controller, without the need for a g-zero gadget. It's a two-part procedure: Apply the patch below (which is written for vanilla 3.8) and load the resulting driver. The patch adds an explicit test to ehci-hcd for detecting the bug. Then plug in an ordinary USB flash drive and run the attached program (as root), giving it the device path for the flash drive as the single command-line argument. For example: sudo ./ehci-test /dev/bus/usb/002/003 The program won't do anything bad to the flash drive; it just reads the first 256 KB of data over and over again, now and then unlinking an URB to try and trigger the bug. If the program works right, it will print out a loop counter every hundred iterations. If it runs for 1000 iterations with no error messages in the kernel log, you may consider that the controller has passed the test. This should take under a minute, depending on the hardware speed. The program won't stop by itself unless something goes wrong. You can kill it with ^C or more simply by unplugging the flash drive. (If you want to be safe, make sure there are no mounted filesystems on the drive before running the test program.) If the hardware bug is detected, the kernel patch will print error messages to the system log. For example, when I run the test on the Intel controller in this computer, I get: [ 150.019441] usb-storage 3-8:1.0: disconnect by usbfs [ 150.271190] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 150.591089] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 151.538560] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 151.857569] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 152.018886] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 152.179810] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 80008d00 00008d00 [ 153.211804] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 153.374497] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 153.770443] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 80008d00 00008d00 [ 154.247861] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 82008d80 00008d00 [ 154.566912] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 82008d80 00008d00 [ 155.359101] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 155.838132] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 156.791107] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 80008d00 00008d00 [ 157.267620] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 00008d00 80008d00 [ 159.252057] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 80008d00 00008d00 [ 159.886048] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 80008d00 00008d00 [ 160.206625] ehci-pci 0000:00:1d.7: EHCI hardware bug detected: 02008d80 80008d00 ... You get the idea. The values in the two columns on the right are always supposed to be equal; when they aren't it indicates that the controller has done a DMA write at a time when ehci-hcd isn't expecting one to happen. I'd be interested to hear the results of testing on a variety of controllers. (This computer also has an NEC EHCI controller, and that one does not have the bug.) Do the EHCI controllers on current Intel chipsets pass the test? What about other vendors? Thanks to all who try it out and report their results. Alan Stern Index: usb-3.8/drivers/usb/host/ehci-q.c =================================================================== --- usb-3.8.orig/drivers/usb/host/ehci-q.c +++ usb-3.8/drivers/usb/host/ehci-q.c @@ -547,7 +547,7 @@ qh_completions (struct ehci_hcd *ehci, s if (stopped != 0 || hw->hw_qtd_next == EHCI_LIST_END(ehci)) { switch (state) { case QH_STATE_IDLE: - qh_refresh(ehci, qh); +// qh_refresh(ehci, qh); break; case QH_STATE_LINKED: /* We won't refresh a QH that's linked (after the HC @@ -1232,6 +1232,7 @@ static void start_iaa_cycle(struct ehci_ static void end_unlink_async(struct ehci_hcd *ehci) { struct ehci_qh *qh; + __hc32 tok1, tok2; if (ehci->has_synopsys_hc_bug) ehci_writel(ehci, (u32) ehci->async->qh_dma, @@ -1242,6 +1243,7 @@ static void end_unlink_async(struct ehci ehci->async_unlinking = true; while (ehci->async_iaa) { qh = ehci->async_iaa; + tok1 = ACCESS_ONCE(qh->hw->hw_token); ehci->async_iaa = qh->unlink_next; qh->unlink_next = NULL; @@ -1250,8 +1252,14 @@ static void end_unlink_async(struct ehci qh_completions(ehci, qh); if (!list_empty(&qh->qtd_list) && - ehci->rh_state == EHCI_RH_RUNNING) + ehci->rh_state == EHCI_RH_RUNNING) { + udelay(10); + tok2 = ACCESS_ONCE(qh->hw->hw_token); + if (tok1 != tok2) + ehci_err(ehci, "EHCI hardware bug detected: %08x %08x\n", + tok1, tok2); qh_link_async(ehci, qh); + } disable_async(ehci); } ehci->async_unlinking = false;
/* * ehci-test.c -- Test EHCI hardware using a flash drive test device * * To build: gcc -O2 -o ehci-test ehci-test.c * * To run: Plug in a USB flash drive and note the bus and device numbers * it gets assigned. Then do: * * sudo ./ehci-test /dev/bus/usb/BBB/DDD * * where BBB and DDD are the bus and device numbers zero-filled to three * digits each. If all goes well the test will not terminate; kill it * after a minute or so with ^C or by unplugging the flash drive. */ #include <stdio.h> #include <errno.h> #include <fcntl.h> #include <string.h> #include <time.h> #include <sys/ioctl.h> #include <linux/usbdevice_fs.h> #include <linux/usb/ch9.h> int fd; unsigned char buf[512]; int ifnum; int ep_in = -1; int ep_out = -1; int start_read = 1; int block_count; int loop_count; #define NUM_URBS 600 struct usbdevfs_urb urbs[NUM_URBS]; struct usbdevfs_urb * const urbs_end = urbs + NUM_URBS; struct usbdevfs_urb *next_urb, *unlinked_urb; #define DEFAULT_TIMEOUT 2 /* seconds */ #define NUM_BLOCKS 512 void init_urb(struct usbdevfs_urb *u) { u->type = USBDEVFS_URB_TYPE_BULK; u->endpoint = ep_in; u->buffer = buf; u->buffer_length = sizeof(buf); } int get_interface(void) { int i, rc; struct usb_interface_descriptor *pi; struct usb_endpoint_descriptor *pe; struct usbdevfs_ioctl ctl; /* Assume we will use the first interface in the first configuration */ i = USB_DT_DEVICE_SIZE + USB_DT_CONFIG_SIZE + USB_DT_INTERFACE_SIZE; rc = read(fd, buf, i); if (rc < i) { perror("Unable to read device file"); return 1; } pi = (struct usb_interface_descriptor *) &buf[USB_DT_DEVICE_SIZE + USB_DT_CONFIG_SIZE]; if (pi->bDescriptorType != USB_DT_INTERFACE || pi->bLength != USB_DT_INTERFACE_SIZE) { fprintf(stderr, "Interface descriptor not found\n"); return 1; } if (pi->bInterfaceClass != USB_CLASS_MASS_STORAGE) { fprintf(stderr, "First interface is is not mass storage\n"); return 1; } if (pi->bInterfaceSubClass != 0x06 || /* Transparent SCSI */ pi->bInterfaceProtocol != 0x50) { /* Bulk Only */ fprintf(stderr, "Interface subclass/protocol is wrong\n"); return 1; } ifnum = pi->bInterfaceNumber; i = pi->bNumEndpoints * USB_DT_ENDPOINT_SIZE; rc = read(fd, buf, i); if (rc != i) { perror("Unable to read endpoint descriptors"); return 1; } for (i = 0; i < pi->bNumEndpoints; ++i) { pe = (struct usb_endpoint_descriptor *) &buf[i * USB_DT_ENDPOINT_SIZE]; if (pe->bDescriptorType != USB_DT_ENDPOINT || pe->bLength != USB_DT_ENDPOINT_SIZE) { fprintf(stderr, "Endpoint descriptor not found\n"); return 1; } if (usb_endpoint_is_bulk_in(pe) && ep_in < 0) ep_in = pe->bEndpointAddress; if (usb_endpoint_is_bulk_out(pe) && ep_out < 0) ep_out = pe->bEndpointAddress; } if (ep_in < 0 || ep_out < 0) { fprintf(stderr, "Didn't find both bulk endpoints\n"); return 1; } /* Unbind usb-storage from the interface */ ctl.ifno = ifnum; ctl.ioctl_code = USBDEVFS_DISCONNECT; rc = ioctl(fd, USBDEVFS_IOCTL, &ctl); if (rc == -1 && errno != ENODATA) { perror("Unable to unbind the kernel driver"); return 1; } /* Claim the interface */ rc = ioctl(fd, USBDEVFS_CLAIMINTERFACE, &ifnum); if (rc == -1) { perror("Unable to claim interface\n"); return 1; } for (i = 0; i < NUM_URBS; ++i) init_urb(&urbs[i]); return 0; } /* Send TEST UNIT READY */ int check_device(void) { int rc; struct usbdevfs_bulktransfer bulk; static unsigned char cbw[31] = { 'U', 'S', 'B', 'C', /* Signature */ 100, 0, 0, 0, /* Tag */ 0, 0, 0, 0, /* DataTransferLength */ 0, 0, 6, /* Flags, LUN, Length of CDB */ 0, 0, 0, 0, 0, 0, /* CDB: TEST UNIT READY */ }; unsigned char csw[13]; bulk.ep = ep_out; bulk.len = sizeof(cbw); bulk.timeout = 1000; bulk.data = cbw; rc = ioctl(fd, USBDEVFS_BULK, &bulk); if (rc < 0) { perror("Unable to send TEST UNIT READY"); return 1; } bulk.ep = ep_in; bulk.len = sizeof(csw); bulk.data = csw; rc = ioctl(fd, USBDEVFS_BULK, &bulk); if (rc < 0) { perror("Unable to get TEST UNIT READY status"); return 1; } if (rc != bulk.len || csw[3] != 'S') { fprintf(stderr, "Invalid CSW data\n"); return 1; } rc = csw[12]; if (rc != 0) { fprintf(stderr, "TEST UNIT READY status %d\n", rc); return 1; } return 0; } int send_READ10(void) { int rc; struct usbdevfs_bulktransfer bulk; static unsigned char cbw[31] = { 'U', 'S', 'B', 'C', /* Signature */ 101, 0, 0, 0, /* Tag */ 0, /* DataTransferLength */ (NUM_BLOCKS << 1) & 0xff, (NUM_BLOCKS >> 7) & 0xff, (NUM_BLOCKS >> 15) & 0xff, 0, 0, 10, /* Flags, LUN, Length of CDB */ 0x28, 0, /* CDB: READ(10), LUN 0 */ 0, 0, 0, 0, /* LBA = 0 */ 0, /* Reserved */ NUM_BLOCKS >> 8, /* Block count (big-endian) */ NUM_BLOCKS & 0xff, 0, /* Control */ }; bulk.ep = ep_out; bulk.len = sizeof(cbw); bulk.timeout = 1000; bulk.data = cbw; rc = ioctl(fd, USBDEVFS_BULK, &bulk); if (rc < 0) { perror("Unable to send TEST UNIT READY"); return 1; } ++cbw[4]; /* Increment the tag */ return 0; } int wait_for_one_urb(int use_timeout) { int rc; time_t tend; struct usbdevfs_urb *u; /* If a READ(10) command is needed, send it */ if (start_read) { if (send_READ10() != 0) return 1; start_read = 0; } tend = time(NULL); if (use_timeout) tend += DEFAULT_TIMEOUT; for (;;) { rc = ioctl(fd, USBDEVFS_REAPURBNDELAY, &u); if (rc == 0) break; if (rc == -1) { if (errno != EAGAIN) { perror("Error in REAPURBNDELAY"); return 1; } } if (time(NULL) >= tend) return -1; /* Timed out */ } // printf("Reaped URB %d status %d actlen %d\n", // u - urbs, u->status, u->actual_length); /* Make sure we are in sync */ if (u == unlinked_urb) { unlinked_urb = NULL; } else if (u == next_urb) { if (++next_urb == urbs_end) next_urb = urbs; } else { fprintf(stderr, "Wrong URB completed\n"); return 1; } if (u->status == -ENOENT && u->usercontext) ; /* Okay, URB was unlinked */ else if (u->status == 0) ; /* Okay, URB completed normally */ else { fprintf(stderr, "Invalid URB status %d, act len %d\n", u->status, u->actual_length); return 1; } if (u->actual_length == 512) { /* Data block */ ++block_count; if (block_count > NUM_BLOCKS) fprintf(stderr, "Block count is too large\n"); } else if (u->actual_length == 13) { /* CSW */ if (buf[3] != 'S' || buf[12] != 0) { fprintf(stderr, "Invalid CSW packet\n"); return 1; } if (block_count != NUM_BLOCKS) { fprintf(stderr, "Block count is too small: %d\n", block_count); return 1; } ++loop_count; if (loop_count % 100 == 0) printf("%d\n", loop_count); start_read = 1; block_count = 0; } else if (u->actual_length == 0) { /* Must have been unlinked */ if (!u->usercontext) { fprintf(stderr, "Got zero-length packet\n"); return 1; } } else { fprintf(stderr, "Got invalid packet length: %d\n", u->actual_length); } /* Resubmit if we're not waiting for an unlinked URB */ if (!unlinked_urb) { do { u->usercontext = NULL; rc = ioctl(fd, USBDEVFS_SUBMITURB, u); if (rc < 0) { perror("Error resubmitting bulk-in urb"); return 1; } if (++u == urbs_end) u = urbs; } while (u != next_urb); } return 0; } int wait_for_urb_with_timeout(void) { int rc; retry: rc = wait_for_one_urb(1); if (rc >= 0) return rc; /* Try to unlink the next URB */ if (!next_urb->usercontext) { printf("URB timed out; bug may be present\n"); next_urb->usercontext = next_urb; ioctl(fd, USBDEVFS_DISCARDURB, next_urb); goto retry; } return 1; } void run_test(void) { int rc, i; struct usbdevfs_urb *u; for (i = 0; i < NUM_URBS; ++i) { u = &urbs[i]; rc = ioctl(fd, USBDEVFS_SUBMITURB, u); if (rc < 0) { perror("Error submitting bulk-in urb"); return; } } next_urb = &urbs[0]; /* If everything works right, the test never stops */ for (;;) { /* Wait for at least two URBs to complete */ for (i = 0; i < 2; ++i) { rc = wait_for_urb_with_timeout(); if (rc != 0) return; } /* Wait until no URBs are ready */ do { rc = wait_for_one_urb(0); if (rc > 0) return; } while (rc == 0); /* Unlink the URB which was just resubmitted */ u = next_urb; if (u == urbs) u = urbs_end; unlinked_urb = --u; u->usercontext = u; rc = ioctl(fd, USBDEVFS_DISCARDURB, u); if (rc == -1) { perror("Error in DISCARDURB"); return; } /* Wait until the unlinked URB completes */ while (unlinked_urb) { rc = wait_for_one_urb(0); if (rc > 0) return; } /* Wait until no URBs are ready */ do { rc = wait_for_one_urb(0); if (rc > 0) return; } while (rc == 0); } } int main(int argc, char **argv) { char *filename; if (argc != 2) { printf("Usage: ehci-test device-filename\n"); return 1; } filename = argv[1]; fd = open(filename, O_RDWR); if (fd < 0) { perror("Error in open"); return 1; } if (get_interface() != 0) return 1; if (check_device() != 0) return 1; run_test(); close(fd); return 0; }