Re: [Qemu-devel] [PATCH v7 2/6] virtio-pmem: Add virtio pmem driver

Pankaj Gupta <pagupta@xxxxxxxxxx> · Wed, 8 May 2019 07:12:47 -0400 (EDT)

> 
> On 4/25/19 10:00 PM, Pankaj Gupta wrote:
> 
> > +void host_ack(struct virtqueue *vq)
> > +{
> > +	unsigned int len;
> > +	unsigned long flags;
> > +	struct virtio_pmem_request *req, *req_buf;
> > +	struct virtio_pmem *vpmem = vq->vdev->priv;
> > +
> > +	spin_lock_irqsave(&vpmem->pmem_lock, flags);
> > +	while ((req = virtqueue_get_buf(vq, &len)) != NULL) {
> > +		req->done = true;
> > +		wake_up(&req->host_acked);
> > +
> > +		if (!list_empty(&vpmem->req_list)) {
> > +			req_buf = list_first_entry(&vpmem->req_list,
> > +					struct virtio_pmem_request, list);
> > +			list_del(&vpmem->req_list);
> 
> Shouldn't it be rather `list_del(vpmem->req_list.next)`? We are trying to
> unlink
> first element of the list and `vpmem->req_list` is just the list head.

This looks correct. We are not deleting head but first entry in 'req_list'
which is device corresponding list of pending requests.

Please see below:

/**
 * Retrieve the first list entry for the given list pointer.
 *
 * Example:
 * struct foo *first;
 * first = list_first_entry(&bar->list_of_foos, struct foo, list_of_foos);
 *
 * @param ptr The list head
 * @param type Data type of the list element to retrieve
 * @param member Member name of the struct list_head field in the list element.
 * @return A pointer to the first list element.
 */
#define list_first_entry(ptr, type, member) \
    list_entry((ptr)->next, type, member)

> 
> > +int virtio_pmem_flush(struct nd_region *nd_region)
> > +{
> > +	int err;
> > +	unsigned long flags;
> > +	struct scatterlist *sgs[2], sg, ret;
> > +	struct virtio_device *vdev = nd_region->provider_data;
> > +	struct virtio_pmem *vpmem = vdev->priv;
> > +	struct virtio_pmem_request *req;
> > +
> > +	might_sleep();
> > +	req = kmalloc(sizeof(*req), GFP_KERNEL);
> > +	if (!req)
> > +		return -ENOMEM;
> > +
> > +	req->done = req->wq_buf_avail = false;
> > +	strcpy(req->name, "FLUSH");
> > +	init_waitqueue_head(&req->host_acked);
> > +	init_waitqueue_head(&req->wq_buf);
> > +	sg_init_one(&sg, req->name, strlen(req->name));
> > +	sgs[0] = &sg;
> > +	sg_init_one(&ret, &req->ret, sizeof(req->ret));
> > +	sgs[1] = &ret;
> > +
> > +	spin_lock_irqsave(&vpmem->pmem_lock, flags);
> > +	err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> > +	if (err) {
> > +		dev_err(&vdev->dev, "failed to send command to virtio pmem device\n");
> > +
> > +		list_add_tail(&vpmem->req_list, &req->list);
> > +		spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> > +
> > +		/* When host has read buffer, this completes via host_ack */
> > +		wait_event(req->wq_buf, req->wq_buf_avail);
> > +		spin_lock_irqsave(&vpmem->pmem_lock, flags);
> > +	}
> 
> Aren't the arguments in `list_add_tail` swapped? The element we are adding

No, this is intentional. 'vpmem->req_list' maintains a list of pending requests
for entire pmem device.  'req->list'is per request list and maintains pending
request on virtio queue add failure. I think we don't need this list.

> should
> be first, the list should be second. Also, shouldn't we resubmit the request
> after
> waking up from `wait_event(req->wq_buf, req->wq_buf_avail)`?

Yes. we should. Good point.

> 
> I propose rewriting it like that:
> 
> diff --git a/drivers/nvdimm/virtio_pmem.c b/drivers/nvdimm/virtio_pmem.c
> index 66b582f751a3..ff0556b04e86 100644
> --- a/drivers/nvdimm/virtio_pmem.c
> +++ b/drivers/nvdimm/virtio_pmem.c
> @@ -25,7 +25,7 @@ void host_ack(struct virtqueue *vq)
>  		if (!list_empty(&vpmem->req_list)) {
>  			req_buf = list_first_entry(&vpmem->req_list,
>  					struct virtio_pmem_request, list);
> -			list_del(&vpmem->req_list);
> +			list_del(vpmem->req_list.next);

Don't think its correct.

>  			req_buf->wq_buf_avail = true;
>  			wake_up(&req_buf->wq_buf);
>  		}
> @@ -59,17 +59,33 @@ int virtio_pmem_flush(struct nd_region *nd_region)
>  	sgs[1] = &ret;
>  
>  	spin_lock_irqsave(&vpmem->pmem_lock, flags);
> -	err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC);
> -	if (err) {
> -		dev_err(&vdev->dev, "failed to send command to virtio pmem device\n");
> +	/*
> +	 * If virtqueue_add_sgs returns -ENOSPC then req_vq virtual queue does not
> +	 * have free descriptor slots. We add the request to req_list and wait
> +	 * for host_ack to wake us up when free slots are available.
> +	 */
> +	while ((err = virtqueue_add_sgs(vpmem->req_vq, sgs, 1, 1, req, GFP_ATOMIC))
> == -ENOSPC) {
> +		dev_err(&vdev->dev, "failed to send command to virtio pmem device, no free
> slots in the virtqueue, postponing request\n");
> +		req->wq_buf_avail = false;
>  
> -		list_add_tail(&vpmem->req_list, &req->list);
> +		list_add_tail(&req->list, &vpmem->req_list);
>  		spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
>  
>  		/* When host has read buffer, this completes via host_ack */
>  		wait_event(req->wq_buf, req->wq_buf_avail);
>  		spin_lock_irqsave(&vpmem->pmem_lock, flags);
>  	}
> +
> +	/*
> +	 * virtqueue_add_sgs failed with error different than -ENOSPC, we can't
> +	 * do anything about that.
> +	 */
> +	if (err) {
> +		dev_info(&vdev->dev, "failed to send command to virtio pmem device, error
> code %d\n", err);
> +		spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> +		err = -EIO;
> +		goto ret;
> +	}
>  	err = virtqueue_kick(vpmem->req_vq);
>  	spin_unlock_irqrestore(&vpmem->pmem_lock, flags);
> 
> 
> Let me know if it looks reasonable to you.

Don't think this is fulfilling entire logic correctly. But thanks, I spotted a bug in my code :)
Will fix it. 

> 
> Thank you,
> Jakub Staron
> 
>