On Sat, Nov 14, 2015 at 12:36:48PM -0500, Alan Stern wrote: > Hmmm. If the shared memory can be accessed by the CPU using ordinary > load and store instructions and it can be kmalloc'ed, then it should be > usable for zerocopy I/O. But again, only if it satisfies the > restrictions imposed by the USB controller's DMA hardware. I don't honestly know how DRM allocates its memory, and the proprietary drivers from NVIDIA/AMD probably is a different story entirely. I guess the only real way would be to have the kernel see if the memory given in is acceptable, and that's probably a different approach from what this one does. Just so we're on the same page, I cleaned up the patch I'm now using, and I'm attaching it here. You said the next step would be changing the memory allocation interface; I guess I could give it a shot, but I doubt I would understand the subtleties involved. /* Steinar */ -- Homepage: https://www.sesse.net/
>From 4cf27fbf80cccc00eb6a5ea75c87cd6315ed82d0 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" <sesse@xxxxxxxxxx> Date: Mon, 16 Nov 2015 01:36:38 +0100 Subject: [PATCH] Add support for usbfs zerocopy. This is essentially a patch by Markus Rechberger with some updates. The original can be found at http://sundtek.de/support/devio_mmap_v0.4.diff This version has the following changes: - Rebased against a newer kernel (with some conflicts fixed). - Fixed most checkpatch violations (some remain). - Fixes an issue where isochronous transfers would not really be zero-copy, but go through a pointless memcpy from one area to itself. - Ask for cached memory instead of uncached. --- drivers/usb/core/devio.c | 229 +++++++++++++++++++++++++++++++++++--- include/uapi/linux/usbdevice_fs.h | 8 ++ 2 files changed, 224 insertions(+), 13 deletions(-) diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index 986abde..39e8c2b 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -69,6 +69,7 @@ struct usb_dev_state { spinlock_t lock; /* protects the async urb lists */ struct list_head async_pending; struct list_head async_completed; + struct list_head memory_list; wait_queue_head_t wait; /* wake up if a request completed */ unsigned int discsignr; struct pid *disc_pid; @@ -96,6 +97,16 @@ struct async { u8 bulk_status; }; +struct usb_memory { + struct list_head memlist; + int vma_use_count; + int usb_use_count; + u32 offset; + u32 size; + void *mem; + unsigned long vm_start; +}; + static bool usbfs_snoop; module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic"); @@ -288,6 +299,9 @@ static struct async *alloc_async(unsigned int numisoframes) static void free_async(struct async *as) { + struct usb_memory *usbm = NULL, *usbm_iter; + unsigned long flags; + struct usb_dev_state *ps = as->ps; int i; put_pid(as->pid); @@ -297,8 +311,22 @@ static void free_async(struct async *as) if (sg_page(&as->urb->sg[i])) kfree(sg_virt(&as->urb->sg[i])); } + + spin_lock_irqsave(&ps->lock, flags); + list_for_each_entry(usbm_iter, &ps->memory_list, memlist) { + if (usbm_iter->mem == as->urb->transfer_buffer) { + usbm = usbm_iter; + break; + } + } + spin_unlock_irqrestore(&ps->lock, flags); + kfree(as->urb->sg); - kfree(as->urb->transfer_buffer); + if (usbm == NULL) + kfree(as->urb->transfer_buffer); + else + usbm->usb_use_count--; + kfree(as->urb->setup_packet); usb_free_urb(as->urb); usbfs_decrease_memory_usage(as->mem_usage); @@ -910,6 +938,7 @@ static int usbdev_open(struct inode *inode, struct file *file) INIT_LIST_HEAD(&ps->list); INIT_LIST_HEAD(&ps->async_pending); INIT_LIST_HEAD(&ps->async_completed); + INIT_LIST_HEAD(&ps->memory_list); init_waitqueue_head(&ps->wait); ps->discsignr = 0; ps->disc_pid = get_pid(task_pid(current)); @@ -938,6 +967,8 @@ static int usbdev_release(struct inode *inode, struct file *file) struct usb_dev_state *ps = file->private_data; struct usb_device *dev = ps->dev; unsigned int ifnum; + struct list_head *p, *q; + struct usb_memory *tmp; struct async *as; usb_lock_device(dev); @@ -962,6 +993,14 @@ static int usbdev_release(struct inode *inode, struct file *file) free_async(as); as = async_getcompleted(ps); } + + list_for_each_safe(p, q, &ps->memory_list) { + tmp = list_entry(p, struct usb_memory, memlist); + list_del(p); + if (tmp->mem) + free_pages_exact(tmp->mem, tmp->size); + kfree(tmp); + } kfree(ps); return 0; } @@ -1289,6 +1328,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb struct usb_host_endpoint *ep; struct async *as = NULL; struct usb_ctrlrequest *dr = NULL; + struct usb_memory *usbm = NULL, *iter = NULL; unsigned int u, totlen, isofrmlen; int i, ret, is_in, num_sgs = 0, ifnum = -1; int number_of_packets = 0; @@ -1370,9 +1410,16 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb uurb->type = USBDEVFS_URB_TYPE_INTERRUPT; goto interrupt_urb; } - num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE); - if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize) - num_sgs = 0; + /* do not use SG buffers when memory mapped segments + * are allocated + */ + if (list_empty(&ps->memory_list)) { + num_sgs = DIV_ROUND_UP( + uurb->buffer_length, USB_SG_SIZE); + if (num_sgs == 1 || + num_sgs > ps->dev->bus->sg_tablesize) + num_sgs = 0; + } if (ep->streams) stream_id = uurb->stream_id; break; @@ -1436,6 +1483,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb goto error; } + as->ps = ps; + u += sizeof(struct async) + sizeof(struct urb) + uurb->buffer_length + num_sgs * sizeof(struct scatterlist); ret = usbfs_increase_memory_usage(u); @@ -1473,21 +1522,49 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb totlen -= u; } } else if (uurb->buffer_length > 0) { - as->urb->transfer_buffer = kmalloc(uurb->buffer_length, - GFP_KERNEL); - if (!as->urb->transfer_buffer) { - ret = -ENOMEM; - goto error; + if (!list_empty(&ps->memory_list)) { + unsigned long flags; + + usbm = NULL; + as->urb->transfer_buffer = NULL; + spin_lock_irqsave(&ps->lock, flags); + list_for_each_entry(iter, &ps->memory_list, memlist) { + if (iter->vm_start == (unsigned long)uurb->buffer && iter->usb_use_count == 0 && + (PAGE_SIZE << get_order(iter->size)) >= uurb->buffer_length) { + usbm = iter; + usbm->usb_use_count++; + break; + } + } + spin_unlock_irqrestore(&ps->lock, flags); + if (usbm) { + as->urb->transfer_buffer = usbm->mem; + } else { + ret = -ENOMEM; + goto error; + } + if (as->urb->transfer_buffer == NULL) { + ret = -ENOMEM; + goto error; + } + } else { + as->urb->transfer_buffer = kmalloc(uurb->buffer_length, + GFP_KERNEL); + if (!as->urb->transfer_buffer) { + ret = -ENOMEM; + goto error; + } } - if (!is_in) { + if (!is_in && usbm == NULL) { if (copy_from_user(as->urb->transfer_buffer, uurb->buffer, uurb->buffer_length)) { ret = -EFAULT; goto error; } - } else if (uurb->type == USBDEVFS_URB_TYPE_ISO) { + } else if (uurb->type == USBDEVFS_URB_TYPE_ISO && + usbm == NULL) { /* * Isochronous input data may end up being * discontiguous if some of the packets are short. @@ -1540,9 +1617,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb } kfree(isopkt); isopkt = NULL; - as->ps = ps; as->userurb = arg; - if (is_in && uurb->buffer_length > 0) + if (is_in && uurb->buffer_length > 0 && usbm == NULL) as->userbuffer = uurb->buffer; else as->userbuffer = NULL; @@ -1601,6 +1677,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb return 0; error: + if (usbm) + usbm->usb_use_count--; kfree(isopkt); kfree(dr); if (as) @@ -2125,6 +2203,65 @@ static int proc_free_streams(struct usb_dev_state *ps, void __user *arg) return r; } +static int proc_release_memory(struct usb_dev_state *ps, void __user *arg) +{ + struct usbdevfs_memory m; + struct usb_memory *usbm; + unsigned long flags; + + if (copy_from_user(&m, arg, sizeof(m))) + return -EFAULT; + + spin_lock_irqsave(&ps->lock, flags); + list_for_each_entry(usbm, &ps->memory_list, memlist) { + if ((usbm->vm_start == (unsigned long)m.buffer || + usbm->offset == m.offset) && + usbm->usb_use_count == 0 && + usbm->vma_use_count == 0) { + list_del_init(&usbm->memlist); + spin_unlock_irqrestore(&ps->lock, flags); + free_pages_exact(usbm->mem, usbm->size); + kfree(usbm); + return 0; + } + } + spin_unlock_irqrestore(&ps->lock, flags); + return -EBUSY; +} + +static int proc_alloc_memory(struct usb_dev_state *ps, void __user *arg) +{ + struct usbdevfs_memory m; + struct usb_memory *usbmem; + void *mem; + unsigned long flags; + + if (copy_from_user(&m, arg, sizeof(m))) + return -EFAULT; + + mem = alloc_pages_exact(m.size, GFP_KERNEL | GFP_DMA32); + if (!mem) + return -ENOMEM; + + usbmem = kzalloc(sizeof(struct usb_memory), GFP_KERNEL); + if (!usbmem) { + free_pages_exact(mem, m.size); + return -ENOMEM; + } + memset(mem, 0x0, (PAGE_SIZE<<get_order(m.size))); + usbmem->mem = mem; + m.offset = usbmem->offset = virt_to_phys(mem); + usbmem->size = m.size; + spin_lock_irqsave(&ps->lock, flags); + list_add_tail(&usbmem->memlist, &ps->memory_list); + spin_unlock_irqrestore(&ps->lock, flags); + + if (copy_to_user(arg, &m, sizeof(m))) + return -EFAULT; + + return 0; +} + /* * NOTE: All requests here that have interface numbers as parameters * are assuming that somehow the configuration has been prevented from @@ -2313,6 +2450,14 @@ static long usbdev_do_ioctl(struct file *file, unsigned int cmd, case USBDEVFS_FREE_STREAMS: ret = proc_free_streams(ps, p); break; + case USBDEVFS_ALLOC_MEMORY: + snoop(&dev->dev, "%s: ALLOC_MEMORY\n", __func__); + ret = proc_alloc_memory(ps, p); + break; + case USBDEVFS_RELEASE_MEMORY: + snoop(&dev->dev, "%s: RELEASE_MEMORY\n", __func__); + ret = proc_release_memory(ps, p); + break; } done: @@ -2332,6 +2477,63 @@ static long usbdev_ioctl(struct file *file, unsigned int cmd, return ret; } +static void usbdev_vm_open(struct vm_area_struct *vma) +{ + struct usb_memory *usbm = vma->vm_private_data; + + usbm->vma_use_count++; +} + +static void usbdev_vm_close(struct vm_area_struct *vma) +{ + struct usb_memory *usbm = vma->vm_private_data; + + usbm->vma_use_count--; +} + + +struct vm_operations_struct usbdev_vm_ops = { + .open = usbdev_vm_open, + .close = usbdev_vm_close +}; + +static int usbdev_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct usb_memory *usbm = NULL, *usbm_iter = NULL; + struct usb_dev_state *ps = file->private_data; + int size = vma->vm_end - vma->vm_start; + unsigned long flags; + + spin_lock_irqsave(&ps->lock, flags); + list_for_each_entry(usbm_iter, &ps->memory_list, memlist) { + if (usbm_iter->offset == (vma->vm_pgoff<<PAGE_SHIFT) && + size <= (PAGE_SIZE<<get_order(usbm_iter->size))) { + usbm = usbm_iter; + usbm->vm_start = vma->vm_start; + break; + } + } + spin_unlock_irqrestore(&ps->lock, flags); + + if (usbm == NULL) + return -EINVAL; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + if (remap_pfn_range(vma, vma->vm_start, + virt_to_phys(usbm->mem) >> PAGE_SHIFT, + size, + vma->vm_page_prot) < 0) + return -EAGAIN; + + vma->vm_flags |= VM_IO; + vma->vm_flags |= (VM_DONTEXPAND | VM_DONTDUMP); + vma->vm_ops = &usbdev_vm_ops; + vma->vm_private_data = usbm; + usbdev_vm_open(vma); + return 0; +} + #ifdef CONFIG_COMPAT static long usbdev_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) @@ -2368,6 +2570,7 @@ const struct file_operations usbdev_file_operations = { #ifdef CONFIG_COMPAT .compat_ioctl = usbdev_compat_ioctl, #endif + .mmap = usbdev_mmap, .open = usbdev_open, .release = usbdev_release, }; diff --git a/include/uapi/linux/usbdevice_fs.h b/include/uapi/linux/usbdevice_fs.h index 019ba1e..761970f 100644 --- a/include/uapi/linux/usbdevice_fs.h +++ b/include/uapi/linux/usbdevice_fs.h @@ -154,6 +154,12 @@ struct usbdevfs_streams { unsigned char eps[0]; }; +struct usbdevfs_memory { + u32 size; + u32 offset; + void __user *buffer; +}; + #define USBDEVFS_CONTROL _IOWR('U', 0, struct usbdevfs_ctrltransfer) #define USBDEVFS_CONTROL32 _IOWR('U', 0, struct usbdevfs_ctrltransfer32) #define USBDEVFS_BULK _IOWR('U', 2, struct usbdevfs_bulktransfer) @@ -187,5 +193,7 @@ struct usbdevfs_streams { #define USBDEVFS_DISCONNECT_CLAIM _IOR('U', 27, struct usbdevfs_disconnect_claim) #define USBDEVFS_ALLOC_STREAMS _IOR('U', 28, struct usbdevfs_streams) #define USBDEVFS_FREE_STREAMS _IOR('U', 29, struct usbdevfs_streams) +#define USBDEVFS_ALLOC_MEMORY _IOWR('U', 30, struct usbdevfs_memory) +#define USBDEVFS_RELEASE_MEMORY _IOW('U', 31, struct usbdevfs_memory) #endif /* _UAPI_LINUX_USBDEVICE_FS_H */ -- 2.1.4