On 5/4/21 10:30 AM, Stefano Garzarella wrote: > On Wed, Apr 28, 2021 at 05:37:11PM -0500, Mike Christie wrote: >> This patch allows userspace to create workers and bind them to vqs, so you >> can have N workers per dev and also share N workers with M vqs. The next >> patch will allow sharing across devices. >> >> Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> >> --- >> drivers/vhost/vhost.c | 95 +++++++++++++++++++++++++++++++- >> drivers/vhost/vhost.h | 3 + >> include/uapi/linux/vhost.h | 6 ++ >> include/uapi/linux/vhost_types.h | 9 +++ >> 4 files changed, 111 insertions(+), 2 deletions(-) >> >> diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c >> index 345ade0af133..fecdae0d18c7 100644 >> --- a/drivers/vhost/vhost.c >> +++ b/drivers/vhost/vhost.c >> @@ -42,6 +42,9 @@ module_param(max_iotlb_entries, int, 0444); >> MODULE_PARM_DESC(max_iotlb_entries, >> "Maximum number of iotlb entries. (default: 2048)"); >> >> +static LIST_HEAD(vhost_workers_list); >> +static DEFINE_SPINLOCK(vhost_workers_lock); >> + >> enum { >> VHOST_MEMORY_F_LOG = 0x1, >> }; >> @@ -617,8 +620,16 @@ static void vhost_detach_mm(struct vhost_dev *dev) >> dev->mm = NULL; >> } >> >> -static void vhost_worker_free(struct vhost_worker *worker) >> +static void vhost_worker_put(struct vhost_worker *worker) >> { >> + spin_lock(&vhost_workers_lock); >> + if (!refcount_dec_and_test(&worker->refcount)) { >> + spin_unlock(&vhost_workers_lock); >> + return; >> + } >> + list_del(&worker->list); >> + spin_unlock(&vhost_workers_lock); >> + >> WARN_ON(!llist_empty(&worker->work_list)); >> kthread_stop(worker->task); >> kfree(worker); >> @@ -632,7 +643,7 @@ static void vhost_workers_free(struct vhost_dev *dev) >> return; >> >> for (i = 0; i < dev->num_workers; i++) >> - vhost_worker_free(dev->workers[i]); >> + vhost_worker_put(dev->workers[i]); >> >> kfree(dev->workers); >> dev->num_workers = 0; >> @@ -652,6 +663,8 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) >> worker->id = dev->num_workers; >> worker->dev = dev; >> init_llist_head(&worker->work_list); >> + INIT_LIST_HEAD(&worker->list); >> + refcount_set(&worker->refcount, 1); >> >> task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid); >> if (IS_ERR(task)) >> @@ -664,6 +677,9 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) >> if (ret) >> goto stop_worker; >> >> + spin_lock(&vhost_workers_lock); >> + list_add_tail(&worker->list, &vhost_workers_list); >> + spin_unlock(&vhost_workers_lock); >> return worker; >> >> stop_worker: >> @@ -673,6 +689,71 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) >> return NULL; >> } >> >> +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid) >> +{ >> + struct vhost_worker *worker; >> + >> + /* TODO hash on pid? */ >> + spin_lock(&vhost_workers_lock); >> + list_for_each_entry(worker, &vhost_workers_list, list) { >> + if (worker->task->pid != pid) >> + continue; >> + >> + /* tmp - next patch allows sharing across devs */ >> + if (worker->dev != dev) { >> + spin_unlock(&vhost_workers_lock); >> + return NULL; >> + } >> + >> + refcount_inc(&worker->refcount); >> + spin_unlock(&vhost_workers_lock); >> + return worker; >> + } >> + spin_unlock(&vhost_workers_lock); >> + return NULL; > > I would like to have a single point where we release the lock to avoid > future issues, how about changing vhost_worker_find() to: > > static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid) > { > struct vhost_worker *worker, *found_worker = NULL; > > spin_lock(&vhost_workers_lock); > list_for_each_entry(worker, &vhost_workers_list, list) { > if (worker->task->pid == pid) { > /* tmp - next patch allows sharing across devs */ > if (worker->dev != dev) > break; > > found_worker = worker; > refcount_inc(&found_worker->refcount); > break; > } > } > spin_unlock(&vhost_workers_lock); > return found_worker; > } Nice. Will do. > >> +} >> + >> +/* Caller must have device mutex */ >> +static int vhost_vq_set_worker(struct vhost_virtqueue *vq, >> + struct vhost_vring_worker *info) >> +{ >> + struct vhost_dev *dev = vq->dev; >> + struct vhost_worker *worker; >> + >> + if (vq->worker) { >> + /* TODO - support changing while works are running */ >> + return -EBUSY; >> + } >> + >> + if (info->pid == -1) { >> + worker = vhost_worker_create(dev); >> + if (!worker) >> + return -ENOMEM; >> + >> + info->pid = worker->task->pid; >> + } else { >> + worker = vhost_worker_find(dev, info->pid); >> + if (!worker) >> + return -ENODEV; >> + } >> + >> + if (!dev->workers) { >> + dev->workers = kcalloc(vq->dev->nvqs, >> + sizeof(struct vhost_worker *), >> + GFP_KERNEL); >> + if (!dev->workers) { >> + vhost_worker_put(worker); >> + return -ENOMEM; >> + } >> + } >> + >> + vq->worker = worker; >> + >> + dev->workers[dev->num_workers] = worker; >> + dev->num_workers++; >> + return 0; >> +} >> + >> /* Caller must have device mutex */ >> static int vhost_worker_try_create_def(struct vhost_dev *dev) >> { >> @@ -1680,6 +1761,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg >> struct eventfd_ctx *ctx = NULL; >> u32 __user *idxp = argp; >> struct vhost_virtqueue *vq; >> + struct vhost_vring_worker w; >> struct vhost_vring_state s; >> struct vhost_vring_file f; >> u32 idx; >> @@ -1794,6 +1876,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg >> if (copy_to_user(argp, &s, sizeof(s))) >> r = -EFAULT; >> break; >> + case VHOST_SET_VRING_WORKER: >> + if (copy_from_user(&w, argp, sizeof(w))) { >> + r = -EFAULT; >> + break; >> + } >> + r = vhost_vq_set_worker(vq, &w); >> + if (!r && copy_to_user(argp, &w, sizeof(w))) >> + r = -EFAULT; >> + break; >> default: >> r = -ENOIOCTLCMD; >> } >> diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h >> index 973889ec7d62..64dc00337389 100644 >> --- a/drivers/vhost/vhost.h >> +++ b/drivers/vhost/vhost.h >> @@ -14,6 +14,7 @@ >> #include <linux/atomic.h> >> #include <linux/vhost_iotlb.h> >> #include <linux/irqbypass.h> >> +#include <linux/refcount.h> >> >> struct vhost_work; >> typedef void (*vhost_work_fn_t)(struct vhost_work *work); >> @@ -28,6 +29,8 @@ struct vhost_work { >> struct vhost_worker { >> struct task_struct *task; >> struct llist_head work_list; >> + struct list_head list; >> + refcount_t refcount; >> struct vhost_dev *dev; >> int id; >> }; >> diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h >> index c998860d7bbc..61a57f5366ee 100644 >> --- a/include/uapi/linux/vhost.h >> +++ b/include/uapi/linux/vhost.h >> @@ -70,6 +70,12 @@ >> #define VHOST_VRING_BIG_ENDIAN 1 >> #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) >> #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) >> +/* Create/bind a vhost worker to a virtqueue. If pid > 0 and matches an existing >> + * vhost_worker thread it will be bound to the vq. If pid is -1, then a new > > What about adding a macro for -1? (e.g. VHOST_VRING_NEW_WORKER) Yeah, that is nicer than a magic number. Will do. _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization