Re: [PATCH v2] vfio: Follow a strict lifetime for struct iommu_group

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 05.10.22 um 16:01 schrieb Jason Gunthorpe:
[..]
commit f8b993620af72fa5f15bd4c1515868013c1c173d
Author: Jason Gunthorpe <jgg@xxxxxxxx>
Date:   Tue Oct 4 13:14:37 2022 -0300

     vfio: Make the group FD disassociate from the iommu_group
     Allow the vfio_group struct to exist with a NULL iommu_group pointer. When
     the pointer is NULL the vfio_group users promise not to touch the
     iommu_group. This allows a driver to be hot unplugged while userspace is
     keeping the group FD open.
     SPAPR mode is excluded from this behavior because of how it wrongly hacks
     part of its iommu interface through KVM. Due to this we loose control over
     what it is doing and cannot revoke the iommu_group usage in the IOMMU
     layer via vfio_group_detach_container().
     Thus, for SPAPR the group FDs must still be closed before a device can be
     hot unplugged.
     This fixes a userspace regression where we learned that virtnodedevd
     leaves a group FD open even though the /dev/ node for it has been deleted
     and all the drivers for it unplugged.
     Fixes: ca5f21b25749 ("vfio: Follow a strict lifetime for struct iommu_group")
     Reported-by: Christian Borntraeger <borntraeger@xxxxxxxxxxxxx>
     Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>

Looks better now (I also did a quick check with vfio-pci on s390)

Tested-by: Christian Borntraeger <borntraeger@xxxxxxxxxx>

Alex I think it would be good to schedule this for vfio-next as well. Do you want Jason
to send this patch as a proper patch outline of this mail thread?



diff --git a/drivers/vfio/pci/vfio_pci_core.c b/drivers/vfio/pci/vfio_pci_core.c
index 59a28251bb0b97..badc9d828cac20 100644
--- a/drivers/vfio/pci/vfio_pci_core.c
+++ b/drivers/vfio/pci/vfio_pci_core.c
@@ -1313,7 +1313,7 @@ static int vfio_pci_ioctl_pci_hot_reset(struct vfio_pci_core_device *vdev,
          }
          /* Ensure the FD is a vfio group FD.*/
-        if (!vfio_file_iommu_group(file)) {
+        if (!vfio_file_is_group(file)) {
              fput(file);
              ret = -EINVAL;
              break;
diff --git a/drivers/vfio/vfio.h b/drivers/vfio/vfio.h
index 4d2de02f2ced6e..4e10a281420e66 100644
--- a/drivers/vfio/vfio.h
+++ b/drivers/vfio/vfio.h
@@ -59,6 +59,7 @@ struct vfio_group {
      struct mutex            group_lock;
      struct kvm            *kvm;
      struct file            *opened_file;
+    bool                preserve_iommu_group;
      struct swait_queue_head        opened_file_wait;
      struct blocking_notifier_head    notifier;
  };
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index 9b1e5fd5f7b73c..13d22bd84afc47 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -133,6 +133,10 @@ __vfio_group_get_from_iommu(struct iommu_group *iommu_group)
  {
      struct vfio_group *group;
+    /*
+     * group->iommu_group from the vfio.group_list cannot be NULL
+     * under the vfio.group_lock.
+     */
      list_for_each_entry(group, &vfio.group_list, vfio_next) {
          if (group->iommu_group == iommu_group) {
              refcount_inc(&group->drivers);
@@ -159,7 +163,7 @@ static void vfio_group_release(struct device *dev)
      mutex_destroy(&group->device_lock);
      mutex_destroy(&group->group_lock);
-    iommu_group_put(group->iommu_group);
+    WARN_ON(group->iommu_group);
      ida_free(&vfio.group_ida, MINOR(group->dev.devt));
      kfree(group);
  }
@@ -248,6 +252,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group,
  static void vfio_device_remove_group(struct vfio_device *device)
  {
      struct vfio_group *group = device->group;
+    struct iommu_group *iommu_group;
      if (group->type == VFIO_NO_IOMMU || group->type == VFIO_EMULATED_IOMMU)
          iommu_group_remove_device(device->dev);
@@ -265,13 +270,36 @@ static void vfio_device_remove_group(struct vfio_device *device)
       */
      cdev_device_del(&group->cdev, &group->dev);
+    mutex_lock(&group->group_lock);
+    /*
+     * These data structures all have paired operations that can only be
+     * undone when the caller holds a live reference on the device. Since
+     * all pairs must be undone these WARN_ON's indicate some caller did not
+     * properly hold the group reference.l.
+     */
+    WARN_ON(!list_empty(&group->device_list));
+    WARN_ON(group->notifier.head);
+
+    /*
+     * Revoke all users of group->iommu_group. At this point we know there
+     * are no devices active because we are unplugging the last one. Setting
+     * iommu_group to NULL blocks all new users.
+     */
+    if (group->container)
+        vfio_group_detach_container(group);
+    iommu_group = group->iommu_group;
+    group->iommu_group = NULL;
+    mutex_unlock(&group->group_lock);
+
      /*
-     * Before we allow the last driver in the group to be unplugged the
-     * group must be sanitized so nothing else is or can reference it. This
-     * is because the group->iommu_group pointer should only be used so long
-     * as a device driver is attached to a device in the group.
+     * Normally we can set the iommu_group to NULL above and that will
+     * prevent any users from touching it. However, the SPAPR kvm path takes
+     * a reference to the iommu_group and keeps using it in arch code out
+     * side our control. So if this path is triggred we have no choice but
+     * to wait for the group FD to be closed to be sure everyone has stopped
+     * touching the group.
       */
-    while (group->opened_file) {
+    while (group->preserve_iommu_group && group->opened_file) {
          mutex_unlock(&vfio.group_lock);
          swait_event_idle_exclusive(group->opened_file_wait,
                         !group->opened_file);
@@ -279,17 +307,7 @@ static void vfio_device_remove_group(struct vfio_device *device)
      }
      mutex_unlock(&vfio.group_lock);
-    /*
-     * These data structures all have paired operations that can only be
-     * undone when the caller holds a live reference on the group. Since all
-     * pairs must be undone these WARN_ON's indicate some caller did not
-     * properly hold the group reference.
-     */
-    WARN_ON(!list_empty(&group->device_list));
-    WARN_ON(group->container || group->container_users);
-    WARN_ON(group->notifier.head);
-    group->iommu_group = NULL;
-
+    iommu_group_put(iommu_group);
      put_device(&group->dev);
  }
@@ -531,6 +549,10 @@ static int __vfio_register_dev(struct vfio_device *device,
      existing_device = vfio_group_get_device(group, device->dev);
      if (existing_device) {
+        /*
+         * group->iommu_group is non-NULL because we hold the drivers
+         * refcount.
+         */
          dev_WARN(device->dev, "Device already exists on group %d\n",
               iommu_group_id(group->iommu_group));
          vfio_device_put_registration(existing_device);
@@ -702,6 +724,11 @@ static int vfio_group_ioctl_set_container(struct vfio_group *group,
          ret = -EINVAL;
          goto out_unlock;
      }
+    if (!group->iommu_group) {
+        ret = -ENODEV;
+        goto out_unlock;
+    }
+
      container = vfio_container_from_file(f.file);
      ret = -EINVAL;
      if (container) {
@@ -862,6 +889,11 @@ static int vfio_group_ioctl_get_status(struct vfio_group *group,
      status.flags = 0;
      mutex_lock(&group->group_lock);
+    if (!group->iommu_group) {
+        mutex_unlock(&group->group_lock);
+        return -ENODEV;
+    }
+
      if (group->container)
          status.flags |= VFIO_GROUP_FLAGS_CONTAINER_SET |
                  VFIO_GROUP_FLAGS_VIABLE;
@@ -938,13 +970,6 @@ static int vfio_group_fops_release(struct inode *inode, struct file *filep)
      filep->private_data = NULL;
      mutex_lock(&group->group_lock);
-    /*
-     * Device FDs hold a group file reference, therefore the group release
-     * is only called when there are no open devices.
-     */
-    WARN_ON(group->notifier.head);
-    if (group->container)
-        vfio_group_detach_container(group);
      group->opened_file = NULL;
      mutex_unlock(&group->group_lock);
      swake_up_one(&group->opened_file_wait);
@@ -1553,17 +1578,41 @@ static const struct file_operations vfio_device_fops = {
   * @file: VFIO group file
   *
   * The returned iommu_group is valid as long as a ref is held on the file.
+ * This function is deprecated, only the SPAPR path in kvm should call it.
   */
  struct iommu_group *vfio_file_iommu_group(struct file *file)
  {
      struct vfio_group *group = file->private_data;
+    struct iommu_group *iommu_group = NULL;
+
+    if (!IS_ENABLED(CONFIG_SPAPR_TCE_IOMMU))
+        return NULL;
      if (file->f_op != &vfio_group_fops)
          return NULL;
-    return group->iommu_group;
+
+    mutex_lock(&vfio.group_lock);
+    mutex_lock(&group->group_lock);
+    if (group->iommu_group) {
+        iommu_group = group->iommu_group;
+        group->preserve_iommu_group = true;
+    }
+    mutex_unlock(&group->group_lock);
+    mutex_unlock(&vfio.group_lock);
+    return iommu_group;
  }
  EXPORT_SYMBOL_GPL(vfio_file_iommu_group);
+/**
+ * vfio_file_is_group - True if the file is usable with VFIO aPIS
+ * @file: VFIO group file
+ */
+bool vfio_file_is_group(struct file *file)
+{
+    return file->f_op == &vfio_group_fops;
+}
+EXPORT_SYMBOL_GPL(vfio_file_is_group);
+
  /**
   * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
   *        is always CPU cache coherent
diff --git a/include/linux/vfio.h b/include/linux/vfio.h
index 73bcb92179a224..bd9faaab85de18 100644
--- a/include/linux/vfio.h
+++ b/include/linux/vfio.h
@@ -199,6 +199,7 @@ int vfio_mig_get_next_state(struct vfio_device *device,
   * External user API
   */
  struct iommu_group *vfio_file_iommu_group(struct file *file);
+bool vfio_file_is_group(struct file *file);
  bool vfio_file_enforced_coherent(struct file *file);
  void vfio_file_set_kvm(struct file *file, struct kvm *kvm);
  bool vfio_file_has_dev(struct file *file, struct vfio_device *device);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ce1b01d02c5197..54aec3b0559c70 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -61,6 +61,23 @@ static bool kvm_vfio_file_enforced_coherent(struct file *file)
      return ret;
  }
+static bool kvm_vfio_file_is_group(struct file *file)
+{
+    bool (*fn)(struct file *file);
+    bool ret;
+
+    fn = symbol_get(vfio_file_is_group);
+    if (!fn)
+        return false;
+
+    ret = fn(file);
+
+    symbol_put(vfio_file_is_group);
+
+    return ret;
+}
+
+#ifdef CONFIG_SPAPR_TCE_IOMMU
  static struct iommu_group *kvm_vfio_file_iommu_group(struct file *file)
  {
      struct iommu_group *(*fn)(struct file *file);
@@ -77,7 +94,6 @@ static struct iommu_group *kvm_vfio_file_iommu_group(struct file *file)
      return ret;
  }
-#ifdef CONFIG_SPAPR_TCE_IOMMU
  static void kvm_spapr_tce_release_vfio_group(struct kvm *kvm,
                           struct kvm_vfio_group *kvg)
  {
@@ -136,7 +152,7 @@ static int kvm_vfio_group_add(struct kvm_device *dev, unsigned int fd)
          return -EBADF;
      /* Ensure the FD is a vfio group FD.*/
-    if (!kvm_vfio_file_iommu_group(filp)) {
+    if (!kvm_vfio_file_is_group(filp)) {
          ret = -EINVAL;
          goto err_fput;
      }



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux