This enables a near linear scaling in multiqueue cases. First virtqueue still gets the worker created unconditionally, the rest is postponed until the actual poll starts on the queue. Signed-off-by: Vitaly Mayatskikh <v.mayatskih@xxxxxxxxx> --- drivers/vhost/vhost.c | 123 +++++++++++++++++++++++++++++++----------- drivers/vhost/vhost.h | 11 +++- 2 files changed, 100 insertions(+), 34 deletions(-) diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 3a5f81a66d34..523dcfac4541 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -185,18 +185,27 @@ void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) } EXPORT_SYMBOL_GPL(vhost_work_init); -/* Init poll structure */ -void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, - __poll_t mask, struct vhost_dev *dev) + +static void vhost_vq_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, + __poll_t mask, struct vhost_virtqueue *vq) { init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup); init_poll_funcptr(&poll->table, vhost_poll_func); poll->mask = mask; - poll->dev = dev; + poll->dev = vq->dev; + poll->vq = vq; poll->wqh = NULL; vhost_work_init(&poll->work, fn); } +EXPORT_SYMBOL_GPL(vhost_vq_poll_init); + +/* Init poll structure */ +void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, + __poll_t mask, struct vhost_dev *dev) +{ + vhost_vq_poll_init(poll, fn, mask, dev->vqs[0]); +} EXPORT_SYMBOL_GPL(vhost_poll_init); /* Start polling a file. We add ourselves to file's wait queue. The caller must @@ -232,31 +241,74 @@ void vhost_poll_stop(struct vhost_poll *poll) } EXPORT_SYMBOL_GPL(vhost_poll_stop); -void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) + +static void vhost_vq_poll_start_work(struct vhost_work *w) +{ + struct vhost_virtqueue *vq = container_of(w, struct vhost_virtqueue, + work); + + vhost_poll_start(&vq->poll, vq->kick); +} + +static int vhost_vq_worker(void *data); + +static int vhost_vq_poll_start(struct vhost_virtqueue *vq) +{ + if (!vq->worker) { + vq->worker = kthread_create(vhost_vq_worker, vq, "vhost-%d/%i", + vq->dev->pid, vq->index); + if (IS_ERR(vq->worker)) { + int ret = PTR_ERR(vq->worker); + + pr_err("%s: can't create vq worker: %d\n", __func__, + ret); + vq->worker = NULL; + return ret; + } + } + vhost_work_init(&vq->work, vhost_vq_poll_start_work); + vhost_vq_work_queue(vq, &vq->work); + return 0; +} + +static void vhost_vq_work_flush(struct vhost_virtqueue *vq, + struct vhost_work *work) { struct vhost_flush_struct flush; - if (dev->worker) { + if (vq->worker) { init_completion(&flush.wait_event); vhost_work_init(&flush.work, vhost_flush_work); - vhost_work_queue(dev, &flush.work); + vhost_vq_work_queue(vq, &flush.work); wait_for_completion(&flush.wait_event); } } +EXPORT_SYMBOL_GPL(vhost_vq_work_flush); + +void vhost_work_flush(struct vhost_dev *dev, struct vhost_work *work) +{ + vhost_vq_work_flush(dev->vqs[0], work); +} EXPORT_SYMBOL_GPL(vhost_work_flush); /* Flush any work that has been scheduled. When calling this, don't hold any * locks that are also used by the callback. */ void vhost_poll_flush(struct vhost_poll *poll) { - vhost_work_flush(poll->dev, &poll->work); + vhost_vq_work_flush(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_flush); void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) { - if (!dev->worker) + return vhost_vq_work_queue(dev->vqs[0], work); +} +EXPORT_SYMBOL_GPL(vhost_work_queue); + +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work) +{ + if (!vq->worker) return; if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) { @@ -264,22 +316,22 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) * sure it was not in the list. * test_and_set_bit() implies a memory barrier. */ - llist_add(&work->node, &dev->work_list); - wake_up_process(dev->worker); + llist_add(&work->node, &vq->work_list); + wake_up_process(vq->worker); } } -EXPORT_SYMBOL_GPL(vhost_work_queue); +EXPORT_SYMBOL_GPL(vhost_vq_work_queue); /* A lockless hint for busy polling code to exit the loop */ bool vhost_has_work(struct vhost_dev *dev) { - return !llist_empty(&dev->work_list); + return !llist_empty(&dev->vqs[0]->work_list); } EXPORT_SYMBOL_GPL(vhost_has_work); void vhost_poll_queue(struct vhost_poll *poll) { - vhost_work_queue(poll->dev, &poll->work); + vhost_vq_work_queue(poll->vq, &poll->work); } EXPORT_SYMBOL_GPL(vhost_poll_queue); @@ -333,9 +385,10 @@ static void vhost_vq_reset(struct vhost_dev *dev, __vhost_vq_meta_reset(vq); } -static int vhost_worker(void *data) +static int vhost_vq_worker(void *data) { - struct vhost_dev *dev = data; + struct vhost_virtqueue *vq = data; + struct vhost_dev *dev = vq->dev; struct vhost_work *work, *work_next; struct llist_node *node; mm_segment_t oldfs = get_fs(); @@ -351,8 +404,7 @@ static int vhost_worker(void *data) __set_current_state(TASK_RUNNING); break; } - - node = llist_del_all(&dev->work_list); + node = llist_del_all(&vq->work_list); if (!node) schedule(); @@ -429,25 +481,26 @@ void vhost_dev_init(struct vhost_dev *dev, dev->umem = NULL; dev->iotlb = NULL; dev->mm = NULL; - dev->worker = NULL; - init_llist_head(&dev->work_list); init_waitqueue_head(&dev->wait); INIT_LIST_HEAD(&dev->read_list); INIT_LIST_HEAD(&dev->pending_list); spin_lock_init(&dev->iotlb_lock); - for (i = 0; i < dev->nvqs; ++i) { vq = dev->vqs[i]; + vq->index = i; vq->log = NULL; vq->indirect = NULL; vq->heads = NULL; vq->dev = dev; + vq->worker = NULL; mutex_init(&vq->mutex); vhost_vq_reset(dev, vq); + init_llist_head(&vq->work_list); + vq->worker = NULL; if (vq->handle_kick) - vhost_poll_init(&vq->poll, vq->handle_kick, - EPOLLIN, dev); + vhost_vq_poll_init(&vq->poll, vq->handle_kick, + EPOLLIN, vq); } } EXPORT_SYMBOL_GPL(vhost_dev_init); @@ -506,14 +559,16 @@ long vhost_dev_set_owner(struct vhost_dev *dev) /* No owner, become one */ dev->mm = get_task_mm(current); - worker = kthread_create(vhost_worker, dev, "vhost-%d", current->pid); + dev->pid = current->pid; + worker = kthread_create(vhost_vq_worker, dev->vqs[0], "vhost-%d/0", + current->pid); if (IS_ERR(worker)) { err = PTR_ERR(worker); goto err_worker; } - dev->worker = worker; - wake_up_process(worker); /* avoid contributing to loadavg */ + dev->vqs[0]->worker = worker; + wake_up_process(worker); /* avoid contributing to loadavg */ err = vhost_attach_cgroups(dev); if (err) @@ -526,7 +581,7 @@ long vhost_dev_set_owner(struct vhost_dev *dev) return 0; err_cgroup: kthread_stop(worker); - dev->worker = NULL; + dev->vqs[0]->worker = NULL; err_worker: if (dev->mm) mmput(dev->mm); @@ -638,11 +693,15 @@ void vhost_dev_cleanup(struct vhost_dev *dev) dev->iotlb = NULL; vhost_clear_msg(dev); wake_up_interruptible_poll(&dev->wait, EPOLLIN | EPOLLRDNORM); - WARN_ON(!llist_empty(&dev->work_list)); - if (dev->worker) { - kthread_stop(dev->worker); - dev->worker = NULL; + + for (i = 0; i < dev->nvqs; ++i) { + WARN_ON(!llist_empty(&dev->vqs[i]->work_list)); + if (dev->vqs[i]->worker) { + kthread_stop(dev->vqs[i]->worker); + dev->vqs[i]->worker = NULL; + } } + if (dev->mm) mmput(dev->mm); dev->mm = NULL; @@ -1564,7 +1623,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg fput(filep); if (pollstart && vq->handle_kick) - r = vhost_poll_start(&vq->poll, vq->kick); + r = vhost_vq_poll_start(vq); mutex_unlock(&vq->mutex); diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 466ef7542291..c00733fac49f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -32,6 +32,7 @@ struct vhost_poll { struct vhost_work work; __poll_t mask; struct vhost_dev *dev; + struct vhost_virtqueue *vq; }; void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn); @@ -86,6 +87,7 @@ struct vhost_virtqueue { /* The actual ring of buffers. */ struct mutex mutex; + unsigned int index; unsigned int num; struct vring_desc __user *desc; struct vring_avail __user *avail; @@ -145,6 +147,10 @@ struct vhost_virtqueue { bool user_be; #endif u32 busyloop_timeout; + + struct llist_head work_list; + struct task_struct *worker; + struct vhost_work work; }; struct vhost_msg_node { @@ -158,12 +164,11 @@ struct vhost_msg_node { struct vhost_dev { struct mm_struct *mm; + pid_t pid; struct mutex mutex; struct vhost_virtqueue **vqs; int nvqs; struct eventfd_ctx *log_ctx; - struct llist_head work_list; - struct task_struct *worker; struct vhost_umem *umem; struct vhost_umem *iotlb; spinlock_t iotlb_lock; @@ -185,6 +190,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg bool vhost_vq_access_ok(struct vhost_virtqueue *vq); bool vhost_log_access_ok(struct vhost_dev *); +void vhost_vq_work_queue(struct vhost_virtqueue *vq, struct vhost_work *work); + int vhost_get_vq_desc(struct vhost_virtqueue *, struct iovec iov[], unsigned int iov_count, unsigned int *out_num, unsigned int *in_num, -- 2.17.1