On Tue, May 17, 2022 at 02:08:51PM -0400, Matthew Rosato wrote: > Rather than relying on a notifier for associating the KVM with > the group, let's assume that the association has already been > made prior to device_open. The first time a device is opened > associate the group KVM with the device. This also fixes a user triggerable oops in gvt > Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx> You can add my signed-off-by as well > diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c > index cfcff7764403..c5d421eda275 100644 > +++ b/drivers/vfio/vfio.c > @@ -10,6 +10,7 @@ > * Author: Tom Lyon, pugs@xxxxxxxxx > */ > > +#include "linux/kvm_host.h" This is the wrong format of include (editor automation, sigh) > @@ -1083,6 +1084,13 @@ static struct file *vfio_device_open(struct vfio_device *device) > > mutex_lock(&device->dev_set->lock); > device->open_count++; > + down_write(&device->group->group_rwsem); Read I suppose > + if (device->open_count == 1 && device->group->kvm) { > + device->kvm = device->group->kvm; > + kvm_get_kvm(device->kvm); > + } > + up_write(&device->group->group_rwsem); Yeah, this is OK, not very elegant though I was looking at this - but it could come later too: diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 8e11d9119418be..c5d8dfe8314108 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -10,7 +10,6 @@ * Author: Tom Lyon, pugs@xxxxxxxxx */ -#include "linux/kvm_host.h" #include <linux/cdev.h> #include <linux/compat.h> #include <linux/device.h> @@ -33,6 +32,7 @@ #include <linux/vfio.h> #include <linux/wait.h> #include <linux/sched/signal.h> +#include <linux/kvm_host.h> #include "vfio.h" #define DRIVER_VERSION "0.3" @@ -1059,93 +1059,71 @@ static int vfio_device_assign_container(struct vfio_device *device) static void vfio_device_unassign_container(struct vfio_device *device) { - down_write(&device->group->group_rwsem); + lockdep_assert_held(&device->group->group_rwsem); + WARN_ON(device->group->container_users <= 1); device->group->container_users--; fput(device->group->opened_file); - up_write(&device->group->group_rwsem); } -static struct file *vfio_device_open(struct vfio_device *device) +static int vfio_device_open(struct vfio_device *device) { - struct file *filep; int ret; + lockdep_assert_held(&device->dev_set->lock); + + if (!try_module_get(device->dev->driver->owner)) + return -ENODEV; + down_write(&device->group->group_rwsem); ret = vfio_device_assign_container(device); - if (ret) { - up_write(&device->group->group_rwsem); - return ERR_PTR(ret); - } + if (ret) + goto err_unlock; if (device->ops->flags & VFIO_DEVICE_NEEDS_KVM) { - if (!device->group->kvm) { - up_write(&device->group->group_rwsem); + if (!device->group->kvm) goto err_unassign_container; - } device->kvm = device->group->kvm; kvm_get_kvm(device->kvm); } up_write(&device->group->group_rwsem); - if (!try_module_get(device->dev->driver->owner)) { - ret = -ENODEV; - goto err_put_kvm; - } - - mutex_lock(&device->dev_set->lock); - device->open_count++; - if (device->open_count == 1 && device->ops->open_device) { + if (device->ops->open_device) { ret = device->ops->open_device(device); if (ret) - goto err_undo_count; + goto err_put_kvm; } - mutex_unlock(&device->dev_set->lock); + return 0; - /* - * We can't use anon_inode_getfd() because we need to modify - * the f_mode flags directly to allow more than just ioctls - */ - filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, - device, O_RDWR); - if (IS_ERR(filep)) { - ret = PTR_ERR(filep); - goto err_close_device; +err_put_kvm: + if (device->kvm) { + kvm_put_kvm(device->kvm); + device->kvm = NULL; } + down_write(&device->group->group_rwsem); +err_unassign_container: + vfio_device_unassign_container(device); +err_unlock: + up_write(&device->group->group_rwsem); + module_put(device->dev->driver->owner); + return ret; +} - /* - * TODO: add an anon_inode interface to do this. - * Appears to be missing by lack of need rather than - * explicitly prevented. Now there's need. - */ - filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); - - if (device->group->type == VFIO_NO_IOMMU) - dev_warn(device->dev, "vfio-noiommu device opened by user " - "(%s:%d)\n", current->comm, task_pid_nr(current)); - /* - * On success the ref of device is moved to the file and - * put in vfio_device_fops_release() - */ - return filep; +static void vfio_device_close(struct vfio_device *device) +{ + lockdep_assert_held(&device->dev_set->lock); -err_close_device: - mutex_lock(&device->dev_set->lock); - if (device->open_count == 1 && device->ops->close_device) + if (device->ops->close_device) device->ops->close_device(device); -err_undo_count: - device->open_count--; - mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); -err_put_kvm: if (device->kvm) { kvm_put_kvm(device->kvm); device->kvm = NULL; } -err_unassign_container: + down_write(&device->group->group_rwsem); vfio_device_unassign_container(device); - return ERR_PTR(ret); + up_write(&device->group->group_rwsem); + module_put(device->dev->driver->owner); } static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) @@ -1159,23 +1137,61 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) if (IS_ERR(device)) return PTR_ERR(device); - fdno = get_unused_fd_flags(O_CLOEXEC); - if (fdno < 0) { - ret = fdno; - goto err_put_device; + mutex_lock(&device->dev_set->lock); + device->open_count++; + if (device->open_count == 1) { + ret = vfio_device_open(device); + if (ret) { + device->open_count--; + mutex_unlock(&device->dev_set->lock); + goto err_put_device; + } } + mutex_unlock(&device->dev_set->lock); - filep = vfio_device_open(device); + /* + * We can't use anon_inode_getfd() because we need to modify + * the f_mode flags directly to allow more than just ioctls + */ + filep = anon_inode_getfile("[vfio-device]", &vfio_device_fops, device, + O_RDWR); if (IS_ERR(filep)) { ret = PTR_ERR(filep); - goto err_put_fdno; + goto err_close_device; + } + + /* + * TODO: add an anon_inode interface to do this. + * Appears to be missing by lack of need rather than + * explicitly prevented. Now there's need. + */ + filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); + + fdno = get_unused_fd_flags(O_CLOEXEC); + if (fdno < 0) { + ret = fdno; + goto err_put_file; } + if (device->group->type == VFIO_NO_IOMMU) + dev_warn(device->dev, "vfio-noiommu device opened by user " + "(%s:%d)\n", current->comm, task_pid_nr(current)); + + /* + * On success the ref of device is moved to the file and put in + * vfio_device_fops_release(). + */ fd_install(fdno, filep); return fdno; -err_put_fdno: - put_unused_fd(fdno); +err_put_file: + fput(filep); +err_close_device: + mutex_lock(&device->dev_set->lock); + if (device->open_count == 1) + vfio_device_close(device); + device->open_count--; + mutex_unlock(&device->dev_set->lock); err_put_device: vfio_device_put(device); return ret; @@ -1333,19 +1349,11 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) mutex_lock(&device->dev_set->lock); vfio_assert_device_open(device); - if (device->open_count == 1 && device->ops->close_device) - device->ops->close_device(device); + if (device->open_count == 1) + vfio_device_close(device); device->open_count--; mutex_unlock(&device->dev_set->lock); - module_put(device->dev->driver->owner); - - if (device->kvm) { - kvm_put_kvm(device->kvm); - device->kvm = NULL; - } - vfio_device_unassign_container(device); - vfio_device_put(device); return 0;