Re: Infrastructure for zerocopy I/O

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 26 Nov 2015, Steinar H. Gunderson wrote:

> On Wed, Nov 25, 2015 at 10:29:53AM -0500, Alan Stern wrote:
> > I want to see a modified version of your patch.  Several things need to 
> > be changed or fixed, but the major change needs to be the way memory is 
> > allocated.  It should be done as part of the mmap system call, not as a 
> > separate ioctl.
> 
> I took a stab at this. The attached patch doesn't fix any of your other
> issues, but could you take a look at whether the mmap part looks sane?
> I've verified that it works in practice with my own code, and see no
> performance regressions. It is still the case that you can't mix mmap
> and non-mmap transfers on the same device; I suppose that should be governed
> by a zerocopy flag instead of “are there any mmap-ed areas”.

As I recall, Markus's original patch took care of this by checking to
see whether the transfer buffer was in one of the mmap'ed areas.  If it
was then the transfer would be zerocopy; otherwise it would be normal.  
That seems like a reasonable approach.

> Let me go through your other issues:
> 
> > There are numerous smaller issues: The allocated memory needs to be 
> > charged against usbfs_memory_usage
> 
> I'll fix this.
> 
> > the memory should be allocated using dma_zalloc_coherent instead of
> > kmalloc, 
> 
> dma_zalloc_coherent returns a dma_handle in addition to the memory itself;
> should we just throw this away?

No, the handle has to be stored for use when an URB is submitted and 
when the memory is deallocated -- it is a required argument for 
dma_free_coherent().

> > proc_do_submiturb should 
> > check that the buffer starts anywhere within the allocated memory 
> > rather than just at the beginning
> 
> I'll fix this, too.
> 
> > , and so on.
> 
> Clarification appreciated ;-)

Detailed notes below.


> diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
> index 38ae877c..9a1a7d6 100644
> --- a/drivers/usb/core/devio.c
> +++ b/drivers/usb/core/devio.c
> @@ -69,6 +69,7 @@ struct usb_dev_state {
>  	spinlock_t lock;            /* protects the async urb lists */
>  	struct list_head async_pending;
>  	struct list_head async_completed;
> +	struct list_head memory_list;
>  	wait_queue_head_t wait;     /* wake up if a request completed */
>  	unsigned int discsignr;
>  	struct pid *disc_pid;
> @@ -96,6 +97,17 @@ struct async {
>  	u8 bulk_status;
>  };
>  
> +struct usb_memory {
> +	struct list_head memlist;
> +	int vma_use_count;
> +	int usb_use_count;
> +	u32 offset;

What's the reason for the "offset" member?  It doesn't seem to do
anything.

> +	u32 size;
> +	void *mem;

You'll need to store the DMA address as well.

> +	unsigned long vm_start;
> +	struct usb_dev_state *ps;
> +};
> +
>  static bool usbfs_snoop;
>  module_param(usbfs_snoop, bool, S_IRUGO | S_IWUSR);
>  MODULE_PARM_DESC(usbfs_snoop, "true to log all usbfs traffic");
> @@ -157,6 +169,74 @@ static int connected(struct usb_dev_state *ps)
>  			ps->dev->state != USB_STATE_NOTATTACHED);
>  }
>  
> +static struct usb_memory *
> +alloc_usb_memory(struct usb_dev_state *ps, size_t size)
> +{
> +	struct usb_memory *usbm;
> +	void *mem;
> +	unsigned long flags;
> +
> +	mem = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA32);

dma_zalloc_coherent(), not alloc_pages_exact().

> +	if (!mem)
> +		return NULL;
> +
> +	usbm = kzalloc(sizeof(struct usb_memory), GFP_KERNEL);
> +	if (!usbm) {
> +		free_pages_exact(mem, size);
> +		return NULL;
> +	}
> +	memset(mem, 0x0, PAGE_SIZE << get_order(size));

Then this line won't be needed.

> +	usbm->mem = mem;
> +	usbm->offset = virt_to_phys(mem);
> +	usbm->size = size;
> +	usbm->ps = ps;
> +	spin_lock_irqsave(&ps->lock, flags);
> +	list_add_tail(&usbm->memlist, &ps->memory_list);
> +	spin_unlock_irqrestore(&ps->lock, flags);
> +
> +	return usbm;
> +}

This subroutine should be merged into usbdev_mmap().

In fact, all the memory-oriented routines should be located together
in the source file.  It's confusing to put some of them near the start
and others near the end.

> +
> +static void dec_use_count(struct usb_memory *usbm, int *count)
> +{
> +	struct usb_dev_state *ps = usbm->ps;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&ps->lock, flags);
> +	WARN_ON(count != &usbm->usb_use_count && count != &usbm->vma_use_count);
> +	--*count;
> +	if (usbm->usb_use_count == 0 && usbm->vma_use_count == 0) {

Forget about the WARN_ON; you know all the callers because this patch 
introduces them.  If you prefer, instead of a pointer pass an 
enumeration value: USB_MEMORY_URB_COUNT or USB_MEMORY_VMA_COUNT.

Also, you might change the name to make it a little less generic.  For 
example, dec_usb_memory_use_count().

> +		list_del_init(&usbm->memlist);
> +		free_pages_exact(usbm->mem, usbm->size);
> +		usbfs_decrease_memory_usage(
> +			usbm->size + sizeof(struct usb_memory));
> +		kfree(usbm);
> +	}
> +	spin_unlock_irqrestore(&ps->lock, flags);
> +}
> +
> +static struct usb_memory *
> +find_memory_area(struct usb_dev_state *ps, const struct usbdevfs_urb *uurb)
> +{
> +	struct usb_memory *usbm = NULL, *iter;
> +	unsigned long flags;
> +	unsigned long uurb_start = (unsigned long)uurb->buffer;
> +
> +	spin_lock_irqsave(&ps->lock, flags);
> +	list_for_each_entry(iter, &ps->memory_list, memlist) {
> +		if (iter->usb_use_count == 0 &&

I don't think this is necessary.  It should be legal to keep the data
for two URBs in the same memory region, so long as they don't overlap.

> +		    uurb_start >= iter->vm_start &&
> +		    uurb->buffer_length <= iter->vm_start - uurb_start +
> +				(PAGE_SIZE << get_order(iter->size))) {

Shouldn't this be:

			uurb->start >= iter->vm_start &&
			uurb->start < iter->vm_start + iter->size &&
			uurb->buffer_length <= iter->vm_start + iter->size -
					uurb->start) {

> +			usbm = iter;
> +			usbm->usb_use_count++;
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&ps->lock, flags);
> +	return usbm;
> +}
> +
>  static loff_t usbdev_lseek(struct file *file, loff_t offset, int orig)
>  {
>  	loff_t ret;
> @@ -288,6 +368,9 @@ static struct async *alloc_async(unsigned int numisoframes)
>  
>  static void free_async(struct async *as)
>  {
> +	struct usb_memory *usbm = NULL, *usbm_iter;
> +	unsigned long flags;
> +	struct usb_dev_state *ps = as->ps;
>  	int i;
>  
>  	put_pid(as->pid);
> @@ -297,8 +380,22 @@ static void free_async(struct async *as)
>  		if (sg_page(&as->urb->sg[i]))
>  			kfree(sg_virt(&as->urb->sg[i]));
>  	}
> +
> +	spin_lock_irqsave(&ps->lock, flags);
> +	list_for_each_entry(usbm_iter, &ps->memory_list, memlist) {
> +		if (usbm_iter->mem == as->urb->transfer_buffer) {
> +			usbm = usbm_iter;
> +			break;
> +		}
> +	}
> +	spin_unlock_irqrestore(&ps->lock, flags);

There's no need to do this.  Just store the usbm pointer in the as
structure.

> +
>  	kfree(as->urb->sg);
> -	kfree(as->urb->transfer_buffer);
> +	if (usbm == NULL)
> +		kfree(as->urb->transfer_buffer);
> +	else
> +		dec_use_count(usbm, &usbm->usb_use_count);
> +
>  	kfree(as->urb->setup_packet);
>  	usb_free_urb(as->urb);
>  	usbfs_decrease_memory_usage(as->mem_usage);
> @@ -910,6 +1007,7 @@ static int usbdev_open(struct inode *inode, struct file *file)
>  	INIT_LIST_HEAD(&ps->list);
>  	INIT_LIST_HEAD(&ps->async_pending);
>  	INIT_LIST_HEAD(&ps->async_completed);
> +	INIT_LIST_HEAD(&ps->memory_list);
>  	init_waitqueue_head(&ps->wait);
>  	ps->discsignr = 0;
>  	ps->disc_pid = get_pid(task_pid(current));
> @@ -938,6 +1036,8 @@ static int usbdev_release(struct inode *inode, struct file *file)
>  	struct usb_dev_state *ps = file->private_data;
>  	struct usb_device *dev = ps->dev;
>  	unsigned int ifnum;
> +	struct list_head *p, *q;
> +	struct usb_memory *tmp;
>  	struct async *as;
>  
>  	usb_lock_device(dev);
> @@ -962,6 +1062,14 @@ static int usbdev_release(struct inode *inode, struct file *file)
>  		free_async(as);
>  		as = async_getcompleted(ps);
>  	}
> +
> +	list_for_each_safe(p, q, &ps->memory_list) {
> +		tmp = list_entry(p, struct usb_memory, memlist);
> +		list_del(p);
> +		if (tmp->mem)
> +			free_pages_exact(tmp->mem, tmp->size);
> +		kfree(tmp);

No, you can't do this here.  The memory might still be in use by the
VMA.  You have to do:

	list_del(&ps->memory_list);

perhaps with a comment explaining when the usb_memory structures and
the memory buffers get freed (i.e., when the use counters go to 0).

> +	}
>  	kfree(ps);
>  	return 0;
>  }
> @@ -1291,6 +1399,7 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
>  	struct usb_host_endpoint *ep;
>  	struct async *as = NULL;
>  	struct usb_ctrlrequest *dr = NULL;
> +	struct usb_memory *usbm = NULL;
>  	unsigned int u, totlen, isofrmlen;
>  	int i, ret, is_in, num_sgs = 0, ifnum = -1;
>  	int number_of_packets = 0;
> @@ -1372,9 +1481,17 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
>  			uurb->type = USBDEVFS_URB_TYPE_INTERRUPT;
>  			goto interrupt_urb;
>  		}
> -		num_sgs = DIV_ROUND_UP(uurb->buffer_length, USB_SG_SIZE);
> -		if (num_sgs == 1 || num_sgs > ps->dev->bus->sg_tablesize)
> -			num_sgs = 0;
> +		/* do not use SG buffers when memory mapped segments
> +		 * are allocated
> +		 */
> +		if (list_empty(&ps->memory_list)) {

No, first call find_memory_area().  If the result is NULL then do this.

> +			num_sgs = DIV_ROUND_UP(uurb->buffer_length,
> +					       USB_SG_SIZE);
> +			if (num_sgs == 1 ||
> +			    num_sgs > ps->dev->bus->sg_tablesize) {
> +				num_sgs = 0;
> +			}
> +		}
>  		if (ep->streams)
>  			stream_id = uurb->stream_id;
>  		break;
> @@ -1439,6 +1556,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
>  		goto error;
>  	}
>  
> +	as->ps = ps;
> +

Why move this?  Isn't it fine where it is?

>  	u += sizeof(struct async) + sizeof(struct urb) + uurb->buffer_length +
>  	     num_sgs * sizeof(struct scatterlist);
>  	ret = usbfs_increase_memory_usage(u);
> @@ -1476,21 +1595,32 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
>  			totlen -= u;
>  		}
>  	} else if (uurb->buffer_length > 0) {
> -		as->urb->transfer_buffer = kmalloc(uurb->buffer_length,
> -				GFP_KERNEL);
> +		if (!list_empty(&ps->memory_list)) {
> +			usbm = find_memory_area(ps, uurb);
> +			if (usbm) {
> +				as->urb->transfer_buffer = usbm->mem;
> +			} else {
> +				ret = -ENOMEM;
> +				goto error;
> +			}
> +		} else {
> +			as->urb->transfer_buffer = kmalloc(uurb->buffer_length,
> +							   GFP_KERNEL);
> +		}
>  		if (!as->urb->transfer_buffer) {
>  			ret = -ENOMEM;
>  			goto error;
>  		}
>  
> -		if (!is_in) {
> +		if (!is_in && usbm == NULL) {
>  			if (copy_from_user(as->urb->transfer_buffer,
>  					   uurb->buffer,
>  					   uurb->buffer_length)) {
>  				ret = -EFAULT;
>  				goto error;
>  			}
> -		} else if (uurb->type == USBDEVFS_URB_TYPE_ISO) {
> +		} else if (uurb->type == USBDEVFS_URB_TYPE_ISO &&
> +			   usbm == NULL) {
>  			/*
>  			 * Isochronous input data may end up being
>  			 * discontiguous if some of the packets are short.
> @@ -1543,9 +1673,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
>  	}
>  	kfree(isopkt);
>  	isopkt = NULL;
> -	as->ps = ps;
>  	as->userurb = arg;
> -	if (is_in && uurb->buffer_length > 0)
> +	if (is_in && uurb->buffer_length > 0 && usbm == NULL)

I think you should keep this even when usbm is not NULL.  No?

>  		as->userbuffer = uurb->buffer;
>  	else
>  		as->userbuffer = NULL;

When you use dma_zalloc_coherent(), you have to tell the USB core that
the buffer is already mapped for DMA by setting the URB_NO_TRANSFER_DMA_MAP
flag in urb->transfer_flags and by storing the DMA address in
urb->transfer_dma.

> @@ -1604,6 +1733,8 @@ static int proc_do_submiturb(struct usb_dev_state *ps, struct usbdevfs_urb *uurb
>  	return 0;
>  
>   error:
> +	if (usbm)
> +		dec_use_count(usbm, &usbm->usb_use_count);
>  	kfree(isopkt);
>  	kfree(dr);
>  	if (as)

You have to change processcompl().  The copy_urb_data() call isn't
needed when zerocopy is being used -- in fact, it rather defeats the 
purpose of zerocopy!

> @@ -2337,6 +2468,57 @@ static long usbdev_ioctl(struct file *file, unsigned int cmd,
>  	return ret;
>  }
>  
> +static void usbdev_vm_open(struct vm_area_struct *vma)
> +{
> +	struct usb_memory *usbm = vma->vm_private_data;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(&usbm->ps->lock, flags);
> +	++usbm->vma_use_count;
> +	spin_unlock_irqrestore(&usbm->ps->lock, flags);
> +}
> +
> +static void usbdev_vm_close(struct vm_area_struct *vma)
> +{
> +	struct usb_memory *usbm = vma->vm_private_data;
> +
> +	dec_use_count(usbm, &usbm->vma_use_count);
> +}
> +
> +struct vm_operations_struct usbdev_vm_ops = {
> +	.open = usbdev_vm_open,
> +	.close = usbdev_vm_close
> +};
> +
> +static int usbdev_mmap(struct file *file, struct vm_area_struct *vma)
> +{
> +	struct usb_memory *usbm = NULL;
> +	struct usb_dev_state *ps = file->private_data;
> +	size_t size = vma->vm_end - vma->vm_start;
> +	int ret;
> +
> +	ret = usbfs_increase_memory_usage(size + sizeof(struct usb_memory));
> +	if (ret)
> +		return ret;
> +
> +	usbm = alloc_usb_memory(ps, size);
> +	if (!usbm)
> +		return -ENOMEM;
> +
> +	if (remap_pfn_range(vma, vma->vm_start,
> +			    virt_to_phys(usbm->mem) >> PAGE_SHIFT,
> +			    size, vma->vm_page_prot) < 0)
> +		return -EAGAIN;

As Oliver pointed out, the memory needs to be reclaimed and accounted
for on the failure pathways.

> +
> +	usbm->vm_start = vma->vm_start;
> +	vma->vm_flags |= VM_IO;
> +	vma->vm_flags |= (VM_DONTEXPAND | VM_DONTDUMP);
> +	vma->vm_ops = &usbdev_vm_ops;
> +	vma->vm_private_data = usbm;
> +	usbdev_vm_open(vma);
> +	return 0;
> +}
> +
>  #ifdef CONFIG_COMPAT
>  static long usbdev_compat_ioctl(struct file *file, unsigned int cmd,
>  			unsigned long arg)
> @@ -2373,6 +2555,7 @@ const struct file_operations usbdev_file_operations = {
>  #ifdef CONFIG_COMPAT
>  	.compat_ioctl =   usbdev_compat_ioctl,
>  #endif
> +	.mmap =           usbdev_mmap,
>  	.open =		  usbdev_open,
>  	.release =	  usbdev_release,
>  };

Alan Stern

--
To unsubscribe from this list: send the line "unsubscribe linux-usb" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Media]     [Linux Input]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [Old Linux USB Devel Archive]

  Powered by Linux