On Tue, Sep 21, 2021 at 04:52:18PM -0500, Mike Christie wrote: > For vhost workers we use the kthread API which inherit's its values from > and checks against the kthreadd thread. This results in cgroups v2 not > working and the wrong RLIMITs being checked. This patch has us use the > kernel_copy_process function which will inherit its values/checks from the > thread that owns the device. > > Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> Acked-by: Michael S. Tsirkin <mst@xxxxxxxxxx> Feel free to merge with other bits. > --- > drivers/vhost/vhost.c | 68 ++++++++++++++++--------------------------- > drivers/vhost/vhost.h | 7 ++++- > 2 files changed, 31 insertions(+), 44 deletions(-) > > diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c > index c9a1f706989c..7a5142dcde1b 100644 > --- a/drivers/vhost/vhost.c > +++ b/drivers/vhost/vhost.c > @@ -22,7 +22,6 @@ > #include <linux/slab.h> > #include <linux/vmalloc.h> > #include <linux/kthread.h> > -#include <linux/cgroup.h> > #include <linux/module.h> > #include <linux/sort.h> > #include <linux/sched/mm.h> > @@ -344,17 +343,14 @@ static void vhost_vq_reset(struct vhost_dev *dev, > static int vhost_worker(void *data) > { > struct vhost_worker *worker = data; > - struct vhost_dev *dev = worker->dev; > struct vhost_work *work, *work_next; > struct llist_node *node; > > - kthread_use_mm(dev->mm); > - > for (;;) { > /* mb paired w/ kthread_stop */ > set_current_state(TASK_INTERRUPTIBLE); > > - if (kthread_should_stop()) { > + if (test_bit(VHOST_WORKER_FLAG_STOP, &worker->flags)) { > __set_current_state(TASK_RUNNING); > break; > } > @@ -376,8 +372,9 @@ static int vhost_worker(void *data) > schedule(); > } > } > - kthread_unuse_mm(dev->mm); > - return 0; > + > + complete(worker->exit_done); > + do_exit(0); > } > > static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) > @@ -517,31 +514,6 @@ long vhost_dev_check_owner(struct vhost_dev *dev) > } > EXPORT_SYMBOL_GPL(vhost_dev_check_owner); > > -struct vhost_attach_cgroups_struct { > - struct vhost_work work; > - struct task_struct *owner; > - int ret; > -}; > - > -static void vhost_attach_cgroups_work(struct vhost_work *work) > -{ > - struct vhost_attach_cgroups_struct *s; > - > - s = container_of(work, struct vhost_attach_cgroups_struct, work); > - s->ret = cgroup_attach_task_all(s->owner, current); > -} > - > -static int vhost_attach_cgroups(struct vhost_dev *dev) > -{ > - struct vhost_attach_cgroups_struct attach; > - > - attach.owner = current; > - vhost_work_init(&attach.work, vhost_attach_cgroups_work); > - vhost_work_queue(dev, &attach.work); > - vhost_work_dev_flush(dev); > - return attach.ret; > -} > - > /* Caller should have device mutex */ > bool vhost_dev_has_owner(struct vhost_dev *dev) > { > @@ -579,6 +551,16 @@ static void vhost_detach_mm(struct vhost_dev *dev) > dev->mm = NULL; > } > > +static void vhost_worker_stop(struct vhost_worker *worker) > +{ > + DECLARE_COMPLETION_ONSTACK(exit_done); > + > + worker->exit_done = &exit_done; > + set_bit(VHOST_WORKER_FLAG_STOP, &worker->flags); > + wake_up_process(worker->task); > + wait_for_completion(worker->exit_done); > +} > + > static void vhost_worker_free(struct vhost_dev *dev) > { > struct vhost_worker *worker = dev->worker; > @@ -588,7 +570,7 @@ static void vhost_worker_free(struct vhost_dev *dev) > > dev->worker = NULL; > WARN_ON(!llist_empty(&worker->work_list)); > - kthread_stop(worker->task); > + vhost_worker_stop(worker); > kfree(worker); > } > > @@ -603,27 +585,27 @@ static int vhost_worker_create(struct vhost_dev *dev) > return -ENOMEM; > > dev->worker = worker; > - worker->dev = dev; > worker->kcov_handle = kcov_common_handle(); > init_llist_head(&worker->work_list); > > - task = kthread_create(vhost_worker, worker, "vhost-%d", current->pid); > + /* > + * vhost used to use the kthread API which ignores all signals by > + * default and the drivers expect this behavior. So we do not want to > + * ineherit the parent's signal handlers and set our worker to ignore > + * everything below. > + */ > + task = kernel_worker(vhost_worker, worker, NUMA_NO_NODE, > + CLONE_FS | CLONE_CLEAR_SIGHAND, > + KERN_WORKER_NO_FILES | KERN_WORKER_NO_SIGS); > if (IS_ERR(task)) { > ret = PTR_ERR(task); > goto free_worker; > } > > worker->task = task; > - wake_up_process(task); /* avoid contributing to loadavg */ > - > - ret = vhost_attach_cgroups(dev); > - if (ret) > - goto stop_worker; > - > + kernel_worker_start(task, "vhost-%d", current->pid); > return 0; > > -stop_worker: > - kthread_stop(worker->task); > free_worker: > kfree(worker); > dev->worker = NULL; > diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h > index 102ce25e4e13..09748694cb66 100644 > --- a/drivers/vhost/vhost.h > +++ b/drivers/vhost/vhost.h > @@ -25,11 +25,16 @@ struct vhost_work { > unsigned long flags; > }; > > +enum { > + VHOST_WORKER_FLAG_STOP, > +}; > + > struct vhost_worker { > struct task_struct *task; > + struct completion *exit_done; > struct llist_head work_list; > - struct vhost_dev *dev; > u64 kcov_handle; > + unsigned long flags; > }; > > /* Poll a file (eventfd or socket) */ > -- > 2.25.1 _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization