This patch allows userspace to create workers and bind them to vqs, so you can have N workers per dev and also share N workers with M vqs. The next patch will allow sharing across devices. Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> --- drivers/vhost/vhost.c | 94 +++++++++++++++++++++++++++++++- drivers/vhost/vhost.h | 3 + include/uapi/linux/vhost.h | 6 ++ include/uapi/linux/vhost_types.h | 12 ++++ 4 files changed, 113 insertions(+), 2 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 345ade0af133..981e9bac7a31 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -30,6 +30,7 @@ #include <linux/interval_tree_generic.h> #include <linux/nospec.h> #include <linux/kcov.h> +#include <linux/hashtable.h> #include "vhost.h" @@ -42,6 +43,9 @@ module_param(max_iotlb_entries, int, 0444); MODULE_PARM_DESC(max_iotlb_entries, "Maximum number of iotlb entries. (default: 2048)"); +static DEFINE_HASHTABLE(vhost_workers, 5); +static DEFINE_SPINLOCK(vhost_workers_lock); + enum { VHOST_MEMORY_F_LOG = 0x1, }; @@ -617,8 +621,17 @@ static void vhost_detach_mm(struct vhost_dev *dev) dev->mm = NULL; } -static void vhost_worker_free(struct vhost_worker *worker) +static void vhost_worker_put(struct vhost_worker *worker) { + spin_lock(&vhost_workers_lock); + if (!refcount_dec_and_test(&worker->refcount)) { + spin_unlock(&vhost_workers_lock); + return; + } + + hash_del(&worker->h_node); + spin_unlock(&vhost_workers_lock); + WARN_ON(!llist_empty(&worker->work_list)); kthread_stop(worker->task); kfree(worker); @@ -632,7 +645,7 @@ static void vhost_workers_free(struct vhost_dev *dev) return; for (i = 0; i < dev->num_workers; i++) - vhost_worker_free(dev->workers[i]); + vhost_worker_put(dev->workers[i]); kfree(dev->workers); dev->num_workers = 0; @@ -652,6 +665,8 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) worker->id = dev->num_workers; worker->dev = dev; init_llist_head(&worker->work_list); + INIT_HLIST_NODE(&worker->h_node); + refcount_set(&worker->refcount, 1); task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid); if (IS_ERR(task)) @@ -664,6 +679,9 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) if (ret) goto stop_worker; + spin_lock(&vhost_workers_lock); + hash_add(vhost_workers, &worker->h_node, worker->task->pid); + spin_unlock(&vhost_workers_lock); return worker; stop_worker: @@ -673,6 +691,67 @@ static struct vhost_worker *vhost_worker_create(struct vhost_dev *dev) return NULL; } +static struct vhost_worker *vhost_worker_find(struct vhost_dev *dev, pid_t pid) +{ + struct vhost_worker *worker, *found_worker = NULL; + + spin_lock(&vhost_workers_lock); + hash_for_each_possible(vhost_workers, worker, h_node, pid) { + if (worker->task->pid == pid) { + /* tmp - next patch allows sharing across devs */ + if (worker->dev != dev) + break; + + found_worker = worker; + refcount_inc(&worker->refcount); + break; + } + } + spin_unlock(&vhost_workers_lock); + return found_worker; +} + +/* Caller must have device mutex */ +static int vhost_vq_set_worker(struct vhost_virtqueue *vq, + struct vhost_vring_worker *info) +{ + struct vhost_dev *dev = vq->dev; + struct vhost_worker *worker; + + if (vq->worker) { + /* TODO - support changing while works are running */ + return -EBUSY; + } + + if (info->pid == VHOST_VRING_NEW_WORKER) { + worker = vhost_worker_create(dev); + if (!worker) + return -ENOMEM; + + info->pid = worker->task->pid; + } else { + worker = vhost_worker_find(dev, info->pid); + if (!worker) + return -ENODEV; + } + + if (!dev->workers) { + dev->workers = kcalloc(vq->dev->nvqs, + sizeof(struct vhost_worker *), + GFP_KERNEL); + if (!dev->workers) { + vhost_worker_put(worker); + return -ENOMEM; + } + } + + vq->worker = worker; + + dev->workers[dev->num_workers] = worker; + dev->num_workers++; + return 0; +} + /* Caller must have device mutex */ static int vhost_worker_try_create_def(struct vhost_dev *dev) { @@ -1680,6 +1759,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg struct eventfd_ctx *ctx = NULL; u32 __user *idxp = argp; struct vhost_virtqueue *vq; + struct vhost_vring_worker w; struct vhost_vring_state s; struct vhost_vring_file f; u32 idx; @@ -1794,6 +1874,15 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg if (copy_to_user(argp, &s, sizeof(s))) r = -EFAULT; break; + case VHOST_SET_VRING_WORKER: + if (copy_from_user(&w, argp, sizeof(w))) { + r = -EFAULT; + break; + } + r = vhost_vq_set_worker(vq, &w); + if (!r && copy_to_user(argp, &w, sizeof(w))) + r = -EFAULT; + break; default: r = -ENOIOCTLCMD; } @@ -2726,6 +2815,7 @@ EXPORT_SYMBOL_GPL(vhost_set_backend_features); static int __init vhost_init(void) { + hash_init(vhost_workers); return 0; } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 0a252dd45101..75b884ad1f17 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -14,6 +14,7 @@ #include <linux/atomic.h> #include <linux/vhost_iotlb.h> #include <linux/irqbypass.h> +#include <linux/refcount.h> struct vhost_work; typedef void (*vhost_work_fn_t)(struct vhost_work *work); @@ -28,6 +29,8 @@ struct vhost_work { struct vhost_worker { struct task_struct *task; struct llist_head work_list; + struct hlist_node h_node; + refcount_t refcount; struct vhost_dev *dev; int id; }; diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h index c998860d7bbc..ce32119cb139 100644 --- a/include/uapi/linux/vhost.h +++ b/include/uapi/linux/vhost.h @@ -70,6 +70,12 @@ #define VHOST_VRING_BIG_ENDIAN 1 #define VHOST_SET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x13, struct vhost_vring_state) #define VHOST_GET_VRING_ENDIAN _IOW(VHOST_VIRTIO, 0x14, struct vhost_vring_state) +/* Create/bind a vhost worker to a virtqueue. If pid > 0 and matches an existing + * vhost_worker thread it will be bound to the vq. If pid is + * VHOST_VRING_NEW_WORKER, then a new worker will be created and bound to the + * vq. + */ +#define VHOST_SET_VRING_WORKER _IOWR(VHOST_VIRTIO, 0x15, struct vhost_vring_worker) /* The following ioctls use eventfd file descriptors to signal and poll * for events. */ diff --git a/include/uapi/linux/vhost_types.h b/include/uapi/linux/vhost_types.h index f7f6a3a28977..5113baa8bc3e 100644 --- a/include/uapi/linux/vhost_types.h +++ b/include/uapi/linux/vhost_types.h @@ -47,6 +47,18 @@ struct vhost_vring_addr { __u64 log_guest_addr; }; +#define VHOST_VRING_NEW_WORKER -1 + +struct vhost_vring_worker { + unsigned int index; + /* + * The pid of the vhost worker that the vq will be bound to. If + * pid is VHOST_VRING_NEW_WORKER a new worker will be created and it's + * pid will be returned in pid. + */ + __kernel_pid_t pid; +}; + /* no alignment requirement */ struct vhost_iotlb_msg { __u64 iova; -- 2.25.1