Re: [PATCH RFC 11/14] vhost: allow userspace to create workers

Mike Christie <michael.christie@xxxxxxxxxx> · Tue, 4 May 2021 13:45:39 -0500

On 5/4/21 10:30 AM, Stefano Garzarella wrote:
> On Wed, Apr 28, 2021 at 05:37:11PM -0500, Mike Christie wrote:
>> This patch allows userspace to create workers and bind them to vqs, so you
>> can have N workers per dev and also share N workers with M vqs. The next
>> patch will allow sharing across devices.
>>
>> Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
>> ---
>> drivers/vhost/vhost.c            | 95 +++++++++++++++++++++++++++++++-
>> drivers/vhost/vhost.h            |  3 +
>> include/uapi/linux/vhost.h       |  6 ++
>> include/uapi/linux/vhost_types.h |  9 +++
>> 4 files changed, 111 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
>> index 345ade0af133..fecdae0d18c7 100644
>> --- a/drivers/vhost/vhost.c
>> +++ b/drivers/vhost/vhost.c
>> @@ -42,6 +42,9 @@ module_param(max_iotlb_entries, int, 0444);
>> MODULE_PARM_DESC(max_iotlb_entries,
>>     "Maximum number of iotlb entries. (default: 2048)");
>>
>> +static LIST_HEAD(vhost_workers_list);
>> +static DEFINE_SPINLOCK(vhost_workers_lock);
>> +
>> enum {
>>     VHOST_MEMORY_F_LOG = 0x1,
>> };
>> @@ -617,8 +620,16 @@ static void vhost_detach_mm(struct vhost_dev *dev)
>>     dev->mm = NULL;
>> }
>>
>> -static void vhost_worker_free(struct vhost_worker *worker)
>> +static void vhost_worker_put(struct vhost_worker *worker)
>> {
>> +    spin_lock(&vhost_workers_lock);
>> +    if (!refcount_dec_and_test(&worker->refcount)) {
>> +        spin_unlock(&vhost_workers_lock);
>> +        return;
>> +    }
>> +    list_del(&worker->list);
>> +    spin_unlock(&vhost_workers_lock);
>> +
>>     WARN_ON(!llist_empty(&worker->work_list));
>>     kthread_stop(worker->task);
>>     kfree(worker);
>> @@ -632,7 +643,7 @@ static void vhost_workers_free(struct vhost_dev *dev)
>>         return;
>>
>>     for (i = 0; i < dev->num_workers; i++)
>> -        vhost_worker_free(dev->workers[i]);
>> +        vhost_worker_put(dev->workers[i]);
>>
>>     kfree(dev->workers);
>>     dev->num_workers = 0;
>> @@ -652,6 +663,8 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>>     worker->id = dev->num_workers;
>>     worker->dev = dev;
>>     init_llist_head(&worker->work_list);
>> +    INIT_LIST_HEAD(&worker->list);
>> +    refcount_set(&worker->refcount, 1);
>>
>>     task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid);
>>     if (IS_ERR(task))
>> @@ -664,6 +677,9 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>>     if (ret)
>>         goto stop_worker;
>>
>> +    spin_lock(&vhost_workers_lock);
>> +    list_add_tail(&worker->list, &vhost_workers_list);
>> +    spin_unlock(&vhost_workers_lock);
>>     return worker;
>>
>> stop_worker:
>> @@ -673,6 +689,71 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev)
>>     return NULL;
>> }
>>
>> +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid)
>> +{
>> +    struct vhost_worker *worker;
>> +
>> +    /* TODO hash on pid? */
>> +    spin_lock(&vhost_workers_lock);
>> +    list_for_each_entry(worker, &vhost_workers_list, list) {
>> +        if (worker->task->pid != pid)
>> +            continue;
>> +
>> +        /* tmp - next patch allows sharing across devs */
>> +        if (worker->dev != dev) {
>> +            spin_unlock(&vhost_workers_lock);
>> +            return NULL;
>> +        }
>> +
>> +        refcount_inc(&worker->refcount);
>> +        spin_unlock(&vhost_workers_lock);
>> +        return worker;
>> +    }
>> +    spin_unlock(&vhost_workers_lock);
>> +    return NULL;
> 
> I would like to have a single point where we release the lock to avoid
> future issues, how about changing vhost_worker_find() to:
> 
> static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid)
> {
>     struct vhost_worker *worker, *found_worker = NULL;
> 
>     spin_lock(&vhost_workers_lock);
>     list_for_each_entry(worker, &vhost_workers_list, list) {
>         if (worker->task->pid == pid) {
>             /* tmp - next patch allows sharing across devs */
>             if (worker->dev != dev)
>                 break;
> 
>             found_worker = worker;
>             refcount_inc(&found_worker->refcount);
>             break;
>         }
>     }
>     spin_unlock(&vhost_workers_lock);
>     return found_worker;
> }

Nice. Will do.

> 
>> +}
>> +
>> +/* Caller must have device mutex */
>> +static int vhost_vq_set_worker(struct vhost_virtqueue *vq,
>> +                   struct vhost_vring_worker *info)
>> +{
>> +    struct vhost_dev *dev = vq->dev;
>> +    struct vhost_worker *worker;
>> +
>> +    if (vq->worker) {
>> +        /* TODO - support changing while works are running */
>> +        return -EBUSY;
>> +    }
>> +
>> +    if (info->pid == -1) {
>> +        worker = vhost_worker_create(dev);
>> +        if (!worker)
>> +            return -ENOMEM;
>> +
>> +        info->pid = worker->task->pid;
>> +    } else {
>> +        worker = vhost_worker_find(dev, info->pid);
>> +        if (!worker)
>> +            return -ENODEV;
>> +    }
>> +
>> +    if (!dev->workers) {
>> +        dev->workers = kcalloc(vq->dev->nvqs,
>> +                       sizeof(struct vhost_worker *),
>> +                       GFP_KERNEL);
>> +        if (!dev->workers) {
>> +            vhost_worker_put(worker);
>> +            return -ENOMEM;
>> +        }
>> +    }
>> +
>> +    vq->worker = worker;
>> +
>> +    dev->workers[dev->num_workers] = worker;
>> +    dev->num_workers++;
>> +    return 0;
>> +}
>> +
>> /* Caller must have device mutex */
>> static int vhost_worker_try_create_def(struct vhost_dev *dev)
>> {
>> @@ -1680,6 +1761,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
>>     struct eventfd_ctx *ctx = NULL;
>>     u32 __user *idxp = argp;
>>     struct vhost_virtqueue *vq;
>> +    struct vhost_vring_worker w;
>>     struct vhost_vring_state s;
>>     struct vhost_vring_file f;
>>     u32 idx;
>> @@ -1794,6 +1876,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
>>         if (copy_to_user(argp, &s, sizeof(s)))
>>             r = -EFAULT;
>>         break;
>> +    case VHOST_SET_VRING_WORKER:
>> +        if (copy_from_user(&w, argp, sizeof(w))) {
>> +            r = -EFAULT;
>> +            break;
>> +        }
>> +        r = vhost_vq_set_worker(vq, &w);
>> +        if (!r && copy_to_user(argp, &w, sizeof(w)))
>> +            r = -EFAULT;
>> +        break;
>>     default:
>>         r = -ENOIOCTLCMD;
>>     }
>> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
>> index 973889ec7d62..64dc00337389 100644
>> --- a/drivers/vhost/vhost.h
>> +++ b/drivers/vhost/vhost.h
>> @@ -14,6 +14,7 @@
>> #include <linux/atomic.h>
>> #include <linux/vhost_iotlb.h>
>> #include <linux/irqbypass.h>
>> +#include <linux/refcount.h>
>>
>> struct vhost_work;
>> typedef void (*vhost_work_fn_t)(struct vhost_work *work);
>> @@ -28,6 +29,8 @@ struct vhost_work {
>> struct vhost_worker {
>>     struct task_struct    *task;
>>     struct llist_head    work_list;
>> +    struct list_head    list;
>> +    refcount_t        refcount;
>>     struct vhost_dev    *dev;
>>     int            id;
>> };
>> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
>> index c998860d7bbc..61a57f5366ee 100644
>> --- a/include/uapi/linux/vhost.h
>> +++ b/include/uapi/linux/vhost.h
>> @@ -70,6 +70,12 @@
>> #define VHOST_VRING_BIG_ENDIAN 1
>> #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state)
>> #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state)
>> +/* Create/bind a vhost worker to a virtqueue. If pid > 0 and matches an existing
>> + * vhost_worker thread it will be bound to the vq. If pid is -1, then a new
> 
> What about adding a macro for -1? (e.g. VHOST_VRING_NEW_WORKER)

Yeah, that is nicer than a magic number. Will do.
_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization