On Wed, 3 Feb 2010, Albert Herranz wrote: > The HCD_BOUNCE_BUFFERS USB host controller driver flag can be enabled > to instruct the USB stack to always bounce USB buffers to/from coherent > memory buffers _just_ before/after a host controller transmission. > > This setting allows overcoming some platform-specific limitations. > > For example, the Nintendo Wii video game console is a NOT_COHERENT_CACHE > platform that is unable to safely perform non-32 bit uncached writes > to RAM because the byte enables are not connected to the bus. > Thus, in that platform, "coherent" DMA buffers cannot be directly used > by the kernel code unless it guarantees that all write accesses > to said buffers are done in 32 bit chunks (which is not the case in the > USB subsystem). > > To avoid this unwanted behaviour HCD_BOUNCE_BUFFERS can be enabled at > the HCD controller, causing buffer allocations to be satisfied from > normal memory and, only at the very last moment, before the actual > transfer, buffers get copied to/from their corresponding DMA coherent > bounce buffers. > > Note that HCD_LOCAL_MEM doesn't help in solving this problem as in that > case buffers may be allocated from coherent memory in the first place > and thus potentially accessed in non-32 bit chuncks by USB drivers. This description sounds hopelessly confused. Maybe you're just misusing the term "coherent". The patch itself doesn't affect the coherent DMA mappings anyway; it affects the streaming mappings. Or to put it another way, what's the justification for replacing a call to dma_map_single() with a call to dma_alloc_coherent()? Since the patch doesn't affect any of the coherent mappings (see for example the calls to dma_pool_create() in ehci-mem.c), I don't see how it can possibly do what you claim. > +/** > + * hcd_memcpy32_to_coherent - copy data to a bounce buffer > + * @dst: destination dma bounce buffer > + * @src: source buffer > + * @len: number of bytes to copy > + * > + * This function copies @len bytes from @src to @dst in 32 bit chunks. > + * The caller must guarantee that @dst length is 4 byte aligned and > + * that @dst length is greater than or equal to @src length. > + */ > +static void *hcd_memcpy32_to_coherent(void *dst, const void *src, size_t len) > +{ > + u32 *q = dst, *p = (void *)src; > + u8 *s; > + > + while (len >= 4) { > + *q++ = *p++; > + len -= 4; > + } > + s = (u8 *)p; > + switch (len) { > + case 3: > + *q = s[0] << 24 | s[1] << 16 | s[2] << 8; > + break; > + case 2: > + *q = s[0] << 24 | s[1] << 16; > + break; > + case 1: > + *q = s[0] << 24; > + break; > + default: > + break; > + } > + return dst; > +} What happens if somebody tries to use this code on a little-endian CPU? > static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > gfp_t mem_flags) > { > @@ -1274,53 +1441,80 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb, > if (is_root_hub(urb->dev)) > return 0; > > - if (usb_endpoint_xfer_control(&urb->ep->desc) > - && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) { > - if (hcd->self.uses_dma) { > - urb->setup_dma = dma_map_single( > - hcd->self.controller, > - urb->setup_packet, > - sizeof(struct usb_ctrlrequest), > - DMA_TO_DEVICE); > - if (dma_mapping_error(hcd->self.controller, > - urb->setup_dma)) > - return -EAGAIN; > - } else if (hcd->driver->flags & HCD_LOCAL_MEM) > - ret = hcd_alloc_coherent( > - urb->dev->bus, mem_flags, > + if (usb_endpoint_xfer_control(&urb->ep->desc)) { > + if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) { > + if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) > + urb->setup_dma = 0; > + ret = hcd_bounce_to_coherent( > + hcd->self.controller, mem_flags, > &urb->setup_dma, > (void **)&urb->setup_packet, > sizeof(struct usb_ctrlrequest), > DMA_TO_DEVICE); > + } else if (!(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) { > + if (hcd->self.uses_dma) { > + urb->setup_dma = dma_map_single( > + hcd->self.controller, > + urb->setup_packet, > + sizeof(struct usb_ctrlrequest), > + DMA_TO_DEVICE); > + if (dma_mapping_error(hcd->self.controller, > + urb->setup_dma)) > + return -EAGAIN; > + } else if (hcd->driver->flags & HCD_LOCAL_MEM) > + ret = hcd_alloc_coherent( > + urb->dev->bus, mem_flags, > + &urb->setup_dma, > + (void **)&urb->setup_packet, > + sizeof(struct usb_ctrlrequest), > + DMA_TO_DEVICE); > + } > } > > dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE; > - if (ret == 0 && urb->transfer_buffer_length != 0 > - && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) { > - if (hcd->self.uses_dma) { > - urb->transfer_dma = dma_map_single ( > - hcd->self.controller, > - urb->transfer_buffer, > - urb->transfer_buffer_length, > - dir); > - if (dma_mapping_error(hcd->self.controller, > - urb->transfer_dma)) > - return -EAGAIN; > - } else if (hcd->driver->flags & HCD_LOCAL_MEM) { > - ret = hcd_alloc_coherent( > - urb->dev->bus, mem_flags, > + if (ret == 0 && urb->transfer_buffer_length != 0) { > + if (hcd->driver->flags & HCD_BOUNCE_BUFFERS) { > + if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) > + urb->transfer_dma = 0; > + ret = hcd_bounce_to_coherent( > + hcd->self.controller, mem_flags, > &urb->transfer_dma, > &urb->transfer_buffer, > urb->transfer_buffer_length, > dir); > > - if (ret && usb_endpoint_xfer_control(&urb->ep->desc) > - && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) > - hcd_free_coherent(urb->dev->bus, > - &urb->setup_dma, > - (void **)&urb->setup_packet, > - sizeof(struct usb_ctrlrequest), > - DMA_TO_DEVICE); > + if (ret && usb_endpoint_xfer_control(&urb->ep->desc)) > + hcd_bounce_from_coherent(hcd->self.controller, > + &urb->setup_dma, > + (void **)&urb->setup_packet, > + sizeof(struct usb_ctrlrequest), > + DMA_TO_DEVICE); > + } else if (!(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) { > + if (hcd->self.uses_dma) { > + urb->transfer_dma = dma_map_single( > + hcd->self.controller, > + urb->transfer_buffer, > + urb->transfer_buffer_length, > + dir); > + if (dma_mapping_error(hcd->self.controller, > + urb->transfer_dma)) > + return -EAGAIN; > + } else if (hcd->driver->flags & HCD_LOCAL_MEM) { > + ret = hcd_alloc_coherent( > + urb->dev->bus, mem_flags, > + &urb->transfer_dma, > + &urb->transfer_buffer, > + urb->transfer_buffer_length, > + dir); > + > + if (ret && > + usb_endpoint_xfer_control(&urb->ep->desc)) > + hcd_free_coherent(urb->dev->bus, > + &urb->setup_dma, > + (void **)&urb->setup_packet, > + sizeof(struct usb_ctrlrequest), > + DMA_TO_DEVICE); > + } > } > } > return ret; It seems that every time somebody comes up with a new kind of memory-access restriction, this function grows by a factor of 2. After a few more iterations it will be larger than the rest of the kernel! There must be a better way to structure the requirements here. Alan Stern -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html