Re: [PATCH] vhost: max s/g to match qemu

"Michael S. Tsirkin" <mst@xxxxxxxxxx> · Tue, 14 Sep 2010 18:01:02 +0200

On Tue, Sep 14, 2010 at 11:53:05PM +0800, Jason Wang wrote:
> Qemu supports up to UIO_MAXIOV s/g so we have to match that because guest
> drivers may rely on this.
> 
> Allocate indirect and log arrays dynamically to avoid using too much contigious
> memory and make the length of hdr array to match the header length since each
> iovec entry has a least one byte.
> 
> Test with copying large files w/ and w/o migration in both linux and windows
> guests.
> 
> Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>

Looks good, I'll queue this up for 2.6.37.
Thanks!

> ---
>  drivers/vhost/net.c   |    2 +-
>  drivers/vhost/vhost.c |   49 ++++++++++++++++++++++++++++++++++++++++++++++++-
>  drivers/vhost/vhost.h |   18 ++++++++----------
>  3 files changed, 57 insertions(+), 12 deletions(-)
> 
> diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
> index 29e850a..e828ef1 100644
> --- a/drivers/vhost/net.c
> +++ b/drivers/vhost/net.c
> @@ -243,7 +243,7 @@ static int get_rx_bufs(struct vhost_virtqueue *vq,
>  	int r, nlogs = 0;
>  
>  	while (datalen > 0) {
> -		if (unlikely(headcount >= VHOST_NET_MAX_SG)) {
> +		if (unlikely(headcount >= UIO_MAXIOV)) {
>  			r = -ENOBUFS;
>  			goto err;
>  		}
> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
> index c579dcc..a45270e 100644
> --- a/drivers/vhost/vhost.c
> +++ b/drivers/vhost/vhost.c
> @@ -212,6 +212,45 @@ static int vhost_worker(void *data)
>  	}
>  }
>  
> +/* Helper to allocate iovec buffers for all vqs. */
> +static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
> +{
> +	int i;
> +	for (i = 0; i < dev->nvqs; ++i) {
> +		dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect *
> +					       UIO_MAXIOV, GFP_KERNEL);
> +		dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV,
> +					  GFP_KERNEL);
> +		dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads *
> +					    UIO_MAXIOV, GFP_KERNEL);
> +
> +		if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
> +			!dev->vqs[i].heads)
> +			goto err_nomem;
> +	}
> +	return 0;
> +err_nomem:
> +	for (; i >= 0; --i) {
> +		kfree(dev->vqs[i].indirect);
> +		kfree(dev->vqs[i].log);
> +		kfree(dev->vqs[i].heads);

We probably want to assign NULL values here, same as below.
I have fixed this up in my tree.

> +	}
> +	return -ENOMEM;
> +}
> +
> +static void vhost_dev_free_iovecs(struct vhost_dev *dev)
> +{
> +	int i;
> +	for (i = 0; i < dev->nvqs; ++i) {
> +		kfree(dev->vqs[i].indirect);
> +		dev->vqs[i].indirect = NULL;
> +		kfree(dev->vqs[i].log);
> +		dev->vqs[i].log = NULL;
> +		kfree(dev->vqs[i].heads);
> +		dev->vqs[i].heads = NULL;
> +	}
> +}
> +
>  long vhost_dev_init(struct vhost_dev *dev,
>  		    struct vhost_virtqueue *vqs, int nvqs)
>  {
> @@ -229,6 +268,9 @@ long vhost_dev_init(struct vhost_dev *dev,
>  	dev->worker = NULL;
>  
>  	for (i = 0; i < dev->nvqs; ++i) {
> +		dev->vqs[i].log = NULL;
> +		dev->vqs[i].indirect = NULL;
> +		dev->vqs[i].heads = NULL;
>  		dev->vqs[i].dev = dev;
>  		mutex_init(&dev->vqs[i].mutex);
>  		vhost_vq_reset(dev, dev->vqs + i);
> @@ -295,6 +337,10 @@ static long vhost_dev_set_owner(struct vhost_dev *dev)
>  	if (err)
>  		goto err_cgroup;
>  
> +	err = vhost_dev_alloc_iovecs(dev);
> +	if (err)
> +		goto err_cgroup;
> +
>  	return 0;
>  err_cgroup:
>  	kthread_stop(worker);
> @@ -345,6 +391,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev)
>  			fput(dev->vqs[i].call);
>  		vhost_vq_reset(dev, dev->vqs + i);
>  	}
> +	vhost_dev_free_iovecs(dev);
>  	if (dev->log_ctx)
>  		eventfd_ctx_put(dev->log_ctx);
>  	dev->log_ctx = NULL;
> @@ -946,7 +993,7 @@ static int get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
>  	}
>  
>  	ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
> -			     ARRAY_SIZE(vq->indirect));
> +			     UIO_MAXIOV);
>  	if (unlikely(ret < 0)) {
>  		vq_err(vq, "Translation failure %d in indirect.\n", ret);
>  		return ret;
> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
> index afd7729..edc8929 100644
> --- a/drivers/vhost/vhost.h
> +++ b/drivers/vhost/vhost.h
> @@ -15,11 +15,6 @@
>  
>  struct vhost_device;
>  
> -enum {
> -	/* Enough place for all fragments, head, and virtio net header. */
> -	VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2,
> -};
> -
>  struct vhost_work;
>  typedef void (*vhost_work_fn_t)(struct vhost_work *work);
>  
> @@ -93,12 +88,15 @@ struct vhost_virtqueue {
>  	bool log_used;
>  	u64 log_addr;
>  
> -	struct iovec indirect[VHOST_NET_MAX_SG];
> -	struct iovec iov[VHOST_NET_MAX_SG];
> -	struct iovec hdr[VHOST_NET_MAX_SG];
> +	struct iovec iov[UIO_MAXIOV];
> +	/* hdr is used to store the virtio header.
> +	 * Since each iovec has >= 1 byte length, we never need more than
> +	 * header length entries to store the header. */
> +	struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
> +	struct iovec *indirect;
>  	size_t vhost_hlen;
>  	size_t sock_hlen;
> -	struct vring_used_elem heads[VHOST_NET_MAX_SG];
> +	struct vring_used_elem *heads;
>  	/* We use a kind of RCU to access private pointer.
>  	 * All readers access it from worker, which makes it possible to
>  	 * flush the vhost_work instead of synchronize_rcu. Therefore readers do
> @@ -109,7 +107,7 @@ struct vhost_virtqueue {
>  	void *private_data;
>  	/* Log write descriptors */
>  	void __user *log_base;
> -	struct vhost_log log[VHOST_NET_MAX_SG];
> +	struct vhost_log *log;
>  };
>  
>  struct vhost_dev {
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html