On Thu, 26 Nov 2015, Steinar H. Gunderson wrote: > On Wed, Nov 25, 2015 at 10:29:53AM -0500, Alan Stern wrote: > > I want to see a modified version of your patch. Several things need to > > be changed or fixed, but the major change needs to be the way memory is > > allocated. It should be done as part of the mmap system call, not as a > > separate ioctl. > > I took a stab at this. The attached patch doesn't fix any of your other > issues, but could you take a look at whether the mmap part looks sane? > I've verified that it works in practice with my own code, and see no > performance regressions. It is still the case that you can't mix mmap > and non-mmap transfers on the same device; I suppose that should be governed > by a zerocopy flag instead of “are there any mmap-ed areas”. As I recall, Markus's original patch took care of this by checking to see whether the transfer buffer was in one of the mmap'ed areas. If it was then the transfer would be zerocopy; otherwise it would be normal. That seems like a reasonable approach. > Let me go through your other issues: > > > There are numerous smaller issues: The allocated memory needs to be > > charged against usbfs_memory_usage > > I'll fix this. > > > the memory should be allocated using dma_zalloc_coherent instead of > > kmalloc, > > dma_zalloc_coherent returns a dma_handle in addition to the memory itself; > should we just throw this away? No, the handle has to be stored for use when an URB is submitted and when the memory is deallocated -- it is a required argument for dma_free_coherent(). > > proc_do_submiturb should > > check that the buffer starts anywhere within the allocated memory > > rather than just at the beginning > > I'll fix this, too. > > > , and so on. > > Clarification appreciated ;-) Detailed notes below. > diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c > index 38ae877c..9a1a7d6 100644 > --- a/drivers/usb/core/devio.c > +++ b/drivers/usb/core/devio.c > @@ -69,6 +69,7 @@ struct usb_dev_state { > spinlock_t lock; /* protects the async urb lists */ > struct list_head async_pending; > struct list_head async_completed; > + struct list_head memory_list; > wait_queue_head_t wait; /* wake up if a request completed */ > unsigned int discsignr; > struct pid *disc_pid; > @@ -96,6 +97,17 @@ struct async { > u8 bulk_status; > }; > > +struct usb_memory { > + struct list_head memlist; > + int vma_use_count; > + int usb_use_count; > + u32 offset; What's the reason for the "offset" member? It doesn't seem to do anything. > + u32 size; > + void *mem; You'll need to store the DMA address as well. > + unsigned long vm_start; > + struct usb_dev_state *ps; > +}; > + > static bool usbfs_snoop; > module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR); > MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic"); > @@ -157,6 +169,74 @@ static int connected(struct usb_dev_state *ps) > ps->dev->state != USB_STATE_NOTATTACHED); > } > > +static struct usb_memory * > +alloc_usb_memory(struct usb_dev_state *ps, size_t size) > +{ > + struct usb_memory *usbm; > + void *mem; > + unsigned long flags; > + > + mem = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA32); dma_zalloc_coherent(), not alloc_pages_exact(). > + if (!mem) > + return NULL; > + > + usbm = kzalloc(sizeof(struct usb_memory), GFP_KERNEL); > + if (!usbm) { > + free_pages_exact(mem, size); > + return NULL; > + } > + memset(mem, 0x0, PAGE_SIZE << get_order(size)); Then this line won't be needed. > + usbm->mem = mem; > + usbm->offset = virt_to_phys(mem); > + usbm->size = size; > + usbm->ps = ps; > + spin_lock_irqsave(&ps->lock, flags); > + list_add_tail(&usbm->memlist, &ps->memory_list); > + spin_unlock_irqrestore(&ps->lock, flags); > + > + return usbm; > +} This subroutine should be merged into usbdev_mmap(). In fact, all the memory-oriented routines should be located together in the source file. It's confusing to put some of them near the start and others near the end. > + > +static void dec_use_count(struct usb_memory *usbm, int *count) > +{ > + struct usb_dev_state *ps = usbm->ps; > + unsigned long flags; > + > + spin_lock_irqsave(&ps->lock, flags); > + WARN_ON(count != &usbm->usb_use_count && count != &usbm->vma_use_count); > + --*count; > + if (usbm->usb_use_count == 0 && usbm->vma_use_count == 0) { Forget about the WARN_ON; you know all the callers because this patch introduces them. If you prefer, instead of a pointer pass an enumeration value: USB_MEMORY_URB_COUNT or USB_MEMORY_VMA_COUNT. Also, you might change the name to make it a little less generic. For example, dec_usb_memory_use_count(). > + list_del_init(&usbm->memlist); > + free_pages_exact(usbm->mem, usbm->size); > + usbfs_decrease_memory_usage( > + usbm->size + sizeof(struct usb_memory)); > + kfree(usbm); > + } > + spin_unlock_irqrestore(&ps->lock, flags); > +} > + > +static struct usb_memory * > +find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb) > +{ > + struct usb_memory *usbm = NULL, *iter; > + unsigned long flags; > + unsigned long uurb_start = (unsigned long)uurb->buffer; > + > + spin_lock_irqsave(&ps->lock, flags); > + list_for_each_entry(iter, &ps->memory_list, memlist) { > + if (iter->usb_use_count == 0 && I don't think this is necessary. It should be legal to keep the data for two URBs in the same memory region, so long as they don't overlap. > + uurb_start >= iter->vm_start && > + uurb->buffer_length <= iter->vm_start - uurb_start + > + (PAGE_SIZE << get_order(iter->size))) { Shouldn't this be: uurb->start >= iter->vm_start && uurb->start < iter->vm_start + iter->size && uurb->buffer_length <= iter->vm_start + iter->size - uurb->start) { > + usbm = iter; > + usbm->usb_use_count++; > + break; > + } > + } > + spin_unlock_irqrestore(&ps->lock, flags); > + return usbm; > +} > + > static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig) > { > loff_t ret; > @@ -288,6 +368,9 @@ static struct async *alloc_async(unsigned int numisoframes) > > static void free_async(struct async *as) > { > + struct usb_memory *usbm = NULL, *usbm_iter; > + unsigned long flags; > + struct usb_dev_state *ps = as->ps; > int i; > > put_pid(as->pid); > @@ -297,8 +380,22 @@ static void free_async(struct async *as) > if (sg_page(&as->urb->sg[i])) > kfree(sg_virt(&as->urb->sg[i])); > } > + > + spin_lock_irqsave(&ps->lock, flags); > + list_for_each_entry(usbm_iter, &ps->memory_list, memlist) { > + if (usbm_iter->mem == as->urb->transfer_buffer) { > + usbm = usbm_iter; > + break; > + } > + } > + spin_unlock_irqrestore(&ps->lock, flags); There's no need to do this. Just store the usbm pointer in the as structure. > + > kfree(as->urb->sg); > - kfree(as->urb->transfer_buffer); > + if (usbm == NULL) > + kfree(as->urb->transfer_buffer); > + else > + dec_use_count(usbm, &usbm->usb_use_count); > + > kfree(as->urb->setup_packet); > usb_free_urb(as->urb); > usbfs_decrease_memory_usage(as->mem_usage); > @@ -910,6 +1007,7 @@ static int usbdev_open(struct inode *inode, struct file *file) > INIT_LIST_HEAD(&ps->list); > INIT_LIST_HEAD(&ps->async_pending); > INIT_LIST_HEAD(&ps->async_completed); > + INIT_LIST_HEAD(&ps->memory_list); > init_waitqueue_head(&ps->wait); > ps->discsignr = 0; > ps->disc_pid = get_pid(task_pid(current)); > @@ -938,6 +1036,8 @@ static int usbdev_release(struct inode *inode, struct file *file) > struct usb_dev_state *ps = file->private_data; > struct usb_device *dev = ps->dev; > unsigned int ifnum; > + struct list_head *p, *q; > + struct usb_memory *tmp; > struct async *as; > > usb_lock_device(dev); > @@ -962,6 +1062,14 @@ static int usbdev_release(struct inode *inode, struct file *file) > free_async(as); > as = async_getcompleted(ps); > } > + > + list_for_each_safe(p, q, &ps->memory_list) { > + tmp = list_entry(p, struct usb_memory, memlist); > + list_del(p); > + if (tmp->mem) > + free_pages_exact(tmp->mem, tmp->size); > + kfree(tmp); No, you can't do this here. The memory might still be in use by the VMA. You have to do: list_del(&ps->memory_list); perhaps with a comment explaining when the usb_memory structures and the memory buffers get freed (i.e., when the use counters go to 0). > + } > kfree(ps); > return 0; > } > @@ -1291,6 +1399,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb > struct usb_host_endpoint *ep; > struct async *as = NULL; > struct usb_ctrlrequest *dr = NULL; > + struct usb_memory *usbm = NULL; > unsigned int u, totlen, isofrmlen; > int i, ret, is_in, num_sgs = 0, ifnum = -1; > int number_of_packets = 0; > @@ -1372,9 +1481,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb > uurb->type = USBDEVFS_URB_TYPE_INTERRUPT; > goto interrupt_urb; > } > - num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE); > - if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize) > - num_sgs = 0; > + /* do not use SG buffers when memory mapped segments > + * are allocated > + */ > + if (list_empty(&ps->memory_list)) { No, first call find_memory_area(). If the result is NULL then do this. > + num_sgs = DIV_ROUND_UP(uurb->buffer_length, > + USB_SG_SIZE); > + if (num_sgs == 1 || > + num_sgs > ps->dev->bus->sg_tablesize) { > + num_sgs = 0; > + } > + } > if (ep->streams) > stream_id = uurb->stream_id; > break; > @@ -1439,6 +1556,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb > goto error; > } > > + as->ps = ps; > + Why move this? Isn't it fine where it is? > u += sizeof(struct async) + sizeof(struct urb) + uurb->buffer_length + > num_sgs * sizeof(struct scatterlist); > ret = usbfs_increase_memory_usage(u); > @@ -1476,21 +1595,32 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb > totlen -= u; > } > } else if (uurb->buffer_length > 0) { > - as->urb->transfer_buffer = kmalloc(uurb->buffer_length, > - GFP_KERNEL); > + if (!list_empty(&ps->memory_list)) { > + usbm = find_memory_area(ps, uurb); > + if (usbm) { > + as->urb->transfer_buffer = usbm->mem; > + } else { > + ret = -ENOMEM; > + goto error; > + } > + } else { > + as->urb->transfer_buffer = kmalloc(uurb->buffer_length, > + GFP_KERNEL); > + } > if (!as->urb->transfer_buffer) { > ret = -ENOMEM; > goto error; > } > > - if (!is_in) { > + if (!is_in && usbm == NULL) { > if (copy_from_user(as->urb->transfer_buffer, > uurb->buffer, > uurb->buffer_length)) { > ret = -EFAULT; > goto error; > } > - } else if (uurb->type == USBDEVFS_URB_TYPE_ISO) { > + } else if (uurb->type == USBDEVFS_URB_TYPE_ISO && > + usbm == NULL) { > /* > * Isochronous input data may end up being > * discontiguous if some of the packets are short. > @@ -1543,9 +1673,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb > } > kfree(isopkt); > isopkt = NULL; > - as->ps = ps; > as->userurb = arg; > - if (is_in && uurb->buffer_length > 0) > + if (is_in && uurb->buffer_length > 0 && usbm == NULL) I think you should keep this even when usbm is not NULL. No? > as->userbuffer = uurb->buffer; > else > as->userbuffer = NULL; When you use dma_zalloc_coherent(), you have to tell the USB core that the buffer is already mapped for DMA by setting the URB_NO_TRANSFER_DMA_MAP flag in urb->transfer_flags and by storing the DMA address in urb->transfer_dma. > @@ -1604,6 +1733,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb > return 0; > > error: > + if (usbm) > + dec_use_count(usbm, &usbm->usb_use_count); > kfree(isopkt); > kfree(dr); > if (as) You have to change processcompl(). The copy_urb_data() call isn't needed when zerocopy is being used -- in fact, it rather defeats the purpose of zerocopy! > @@ -2337,6 +2468,57 @@ static long usbdev_ioctl(struct file *file, unsigned int cmd, > return ret; > } > > +static void usbdev_vm_open(struct vm_area_struct *vma) > +{ > + struct usb_memory *usbm = vma->vm_private_data; > + unsigned long flags; > + > + spin_lock_irqsave(&usbm->ps->lock, flags); > + ++usbm->vma_use_count; > + spin_unlock_irqrestore(&usbm->ps->lock, flags); > +} > + > +static void usbdev_vm_close(struct vm_area_struct *vma) > +{ > + struct usb_memory *usbm = vma->vm_private_data; > + > + dec_use_count(usbm, &usbm->vma_use_count); > +} > + > +struct vm_operations_struct usbdev_vm_ops = { > + .open = usbdev_vm_open, > + .close = usbdev_vm_close > +}; > + > +static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) > +{ > + struct usb_memory *usbm = NULL; > + struct usb_dev_state *ps = file->private_data; > + size_t size = vma->vm_end - vma->vm_start; > + int ret; > + > + ret = usbfs_increase_memory_usage(size + sizeof(struct usb_memory)); > + if (ret) > + return ret; > + > + usbm = alloc_usb_memory(ps, size); > + if (!usbm) > + return -ENOMEM; > + > + if (remap_pfn_range(vma, vma->vm_start, > + virt_to_phys(usbm->mem) >> PAGE_SHIFT, > + size, vma->vm_page_prot) < 0) > + return -EAGAIN; As Oliver pointed out, the memory needs to be reclaimed and accounted for on the failure pathways. > + > + usbm->vm_start = vma->vm_start; > + vma->vm_flags |= VM_IO; > + vma->vm_flags |= (VM_DONTEXPAND | VM_DONTDUMP); > + vma->vm_ops = &usbdev_vm_ops; > + vma->vm_private_data = usbm; > + usbdev_vm_open(vma); > + return 0; > +} > + > #ifdef CONFIG_COMPAT > static long usbdev_compat_ioctl(struct file *file, unsigned int cmd, > unsigned long arg) > @@ -2373,6 +2555,7 @@ const struct file_operations usbdev_file_operations = { > #ifdef CONFIG_COMPAT > .compat_ioctl = usbdev_compat_ioctl, > #endif > + .mmap = usbdev_mmap, > .open = usbdev_open, > .release = usbdev_release, > }; Alan Stern -- To unsubscribe from this list: send the line "unsubscribe linux-usb" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html