On 5/23/22 4:41 PM, Alex Williamson wrote: > > Hi Zhi & Zhenyu, > > Please review gvt changes below, I'd prefer to get your ack included. > Thanks! > > Alex > > On Thu, 19 May 2022 14:33:11 -0400 > Matthew Rosato <mjrosato@xxxxxxxxxxxxx> wrote: > >> Rather than relying on a notifier for associating the KVM with >> the group, let's assume that the association has already been >> made prior to device_open. The first time a device is opened >> associate the group KVM with the device. >> >> This fixes a user-triggerable oops in GVT. >> >> Reviewed-by: Tony Krowiak <akrowiak@xxxxxxxxxxxxx> >> Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> >> Reviewed-by: Christoph Hellwig <hch@xxxxxx> >> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> >> Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx> >> --- >> drivers/gpu/drm/i915/gvt/gtt.c | 4 +- >> drivers/gpu/drm/i915/gvt/gvt.h | 3 - >> drivers/gpu/drm/i915/gvt/kvmgt.c | 82 ++++++-------------------- >> drivers/s390/crypto/vfio_ap_ops.c | 35 ++--------- >> drivers/s390/crypto/vfio_ap_private.h | 3 - >> drivers/vfio/vfio.c | 83 ++++++++++----------------- >> include/linux/vfio.h | 6 +- >> 7 files changed, 57 insertions(+), 159 deletions(-) >> >> diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c >> index 9c5cc2800975..b4f69364f9a1 100644 >> --- a/drivers/gpu/drm/i915/gvt/gtt.c >> +++ b/drivers/gpu/drm/i915/gvt/gtt.c >> @@ -51,7 +51,7 @@ static int preallocated_oos_pages = 8192; >> >> static bool intel_gvt_is_valid_gfn(struct intel_vgpu *vgpu, unsigned long gfn) >> { >> - struct kvm *kvm = vgpu->kvm; >> + struct kvm *kvm = vgpu->vfio_device.kvm; >> int idx; >> bool ret; >> >> @@ -1185,7 +1185,7 @@ static int is_2MB_gtt_possible(struct intel_vgpu *vgpu, >> >> if (!vgpu->attached) >> return -EINVAL; >> - pfn = gfn_to_pfn(vgpu->kvm, ops->get_pfn(entry)); >> + pfn = gfn_to_pfn(vgpu->vfio_device.kvm, ops->get_pfn(entry)); >> if (is_error_noslot_pfn(pfn)) >> return -EINVAL; >> return PageTransHuge(pfn_to_page(pfn)); >> diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h >> index 2af4c83e733c..aee1a45da74b 100644 >> --- a/drivers/gpu/drm/i915/gvt/gvt.h >> +++ b/drivers/gpu/drm/i915/gvt/gvt.h >> @@ -227,9 +227,6 @@ struct intel_vgpu { >> struct mutex cache_lock; >> >> struct notifier_block iommu_notifier; >> - struct notifier_block group_notifier; >> - struct kvm *kvm; >> - struct work_struct release_work; >> atomic_t released; >> >> struct kvm_page_track_notifier_node track_node; >> diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c >> index 7655ffa97d51..e2f6c56ab342 100644 >> --- a/drivers/gpu/drm/i915/gvt/kvmgt.c >> +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c >> @@ -228,8 +228,6 @@ static void intel_gvt_cleanup_vgpu_type_groups(struct intel_gvt *gvt) >> } >> } >> >> -static void intel_vgpu_release_work(struct work_struct *work); >> - >> static void gvt_unpin_guest_page(struct intel_vgpu *vgpu, unsigned long gfn, >> unsigned long size) >> { >> @@ -761,23 +759,6 @@ static int intel_vgpu_iommu_notifier(struct notifier_block *nb, >> return NOTIFY_OK; >> } >> >> -static int intel_vgpu_group_notifier(struct notifier_block *nb, >> - unsigned long action, void *data) >> -{ >> - struct intel_vgpu *vgpu = >> - container_of(nb, struct intel_vgpu, group_notifier); >> - >> - /* the only action we care about */ >> - if (action == VFIO_GROUP_NOTIFY_SET_KVM) { >> - vgpu->kvm = data; >> - >> - if (!data) >> - schedule_work(&vgpu->release_work); >> - } >> - >> - return NOTIFY_OK; >> -} >> - >> static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) >> { >> struct intel_vgpu *itr; >> @@ -789,7 +770,7 @@ static bool __kvmgt_vgpu_exist(struct intel_vgpu *vgpu) >> if (!itr->attached) >> continue; >> >> - if (vgpu->kvm == itr->kvm) { >> + if (vgpu->vfio_device.kvm == itr->vfio_device.kvm) { >> ret = true; >> goto out; >> } >> @@ -806,7 +787,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) >> int ret; >> >> vgpu->iommu_notifier.notifier_call = intel_vgpu_iommu_notifier; >> - vgpu->group_notifier.notifier_call = intel_vgpu_group_notifier; >> >> events = VFIO_IOMMU_NOTIFY_DMA_UNMAP; >> ret = vfio_register_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, &events, >> @@ -817,38 +797,32 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) >> goto out; >> } >> >> - events = VFIO_GROUP_NOTIFY_SET_KVM; >> - ret = vfio_register_notifier(vfio_dev, VFIO_GROUP_NOTIFY, &events, >> - &vgpu->group_notifier); >> - if (ret != 0) { >> - gvt_vgpu_err("vfio_register_notifier for group failed: %d\n", >> - ret); >> - goto undo_iommu; >> - } >> - >> ret = -EEXIST; >> if (vgpu->attached) >> - goto undo_register; >> + goto undo_iommu; >> >> ret = -ESRCH; >> - if (!vgpu->kvm || vgpu->kvm->mm != current->mm) { >> + if (!vgpu->vfio_device.kvm || >> + vgpu->vfio_device.kvm->mm != current->mm) { >> gvt_vgpu_err("KVM is required to use Intel vGPU\n"); >> - goto undo_register; >> + goto undo_iommu; >> } >> >> + kvm_get_kvm(vgpu->vfio_device.kvm); >> + >> ret = -EEXIST; >> if (__kvmgt_vgpu_exist(vgpu)) >> - goto undo_register; >> + goto undo_iommu; >> >> vgpu->attached = true; >> - kvm_get_kvm(vgpu->kvm); >> >> kvmgt_protect_table_init(vgpu); >> gvt_cache_init(vgpu); >> >> vgpu->track_node.track_write = kvmgt_page_track_write; >> vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot; >> - kvm_page_track_register_notifier(vgpu->kvm, &vgpu->track_node); >> + kvm_page_track_register_notifier(vgpu->vfio_device.kvm, >> + &vgpu->track_node); >> >> debugfs_create_ulong(KVMGT_DEBUGFS_FILENAME, 0444, vgpu->debugfs, >> &vgpu->nr_cache_entries); >> @@ -858,10 +832,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev) >> atomic_set(&vgpu->released, 0); >> return 0; >> >> -undo_register: >> - vfio_unregister_notifier(vfio_dev, VFIO_GROUP_NOTIFY, >> - &vgpu->group_notifier); >> - >> undo_iommu: >> vfio_unregister_notifier(vfio_dev, VFIO_IOMMU_NOTIFY, >> &vgpu->iommu_notifier); >> @@ -880,8 +850,9 @@ static void intel_vgpu_release_msi_eventfd_ctx(struct intel_vgpu *vgpu) >> } >> } >> >> -static void __intel_vgpu_release(struct intel_vgpu *vgpu) >> +static void intel_vgpu_close_device(struct vfio_device *vfio_dev) >> { >> + struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev); >> struct drm_i915_private *i915 = vgpu->gvt->gt->i915; >> int ret; >> >> @@ -898,35 +869,19 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu) >> drm_WARN(&i915->drm, ret, >> "vfio_unregister_notifier for iommu failed: %d\n", ret); >> >> - ret = vfio_unregister_notifier(&vgpu->vfio_device, VFIO_GROUP_NOTIFY, >> - &vgpu->group_notifier); >> - drm_WARN(&i915->drm, ret, >> - "vfio_unregister_notifier for group failed: %d\n", ret); >> - >> debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs)); >> >> - kvm_page_track_unregister_notifier(vgpu->kvm, &vgpu->track_node); >> - kvm_put_kvm(vgpu->kvm); >> + kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm, >> + &vgpu->track_node); >> kvmgt_protect_table_destroy(vgpu); >> gvt_cache_destroy(vgpu); >> >> intel_vgpu_release_msi_eventfd_ctx(vgpu); >> >> - vgpu->kvm = NULL; >> vgpu->attached = false; >> -} >> - >> -static void intel_vgpu_close_device(struct vfio_device *vfio_dev) >> -{ >> - __intel_vgpu_release(vfio_dev_to_vgpu(vfio_dev)); >> -} >> - >> -static void intel_vgpu_release_work(struct work_struct *work) >> -{ >> - struct intel_vgpu *vgpu = >> - container_of(work, struct intel_vgpu, release_work); >> >> - __intel_vgpu_release(vgpu); >> + if (vgpu->vfio_device.kvm) >> + kvm_put_kvm(vgpu->vfio_device.kvm); >> } >> >> static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar) >> @@ -1675,7 +1630,6 @@ static int intel_vgpu_probe(struct mdev_device *mdev) >> return PTR_ERR(vgpu); >> } >> >> - INIT_WORK(&vgpu->release_work, intel_vgpu_release_work); >> vfio_init_group_dev(&vgpu->vfio_device, &mdev->dev, >> &intel_vgpu_dev_ops); >> >> @@ -1713,7 +1667,7 @@ static struct mdev_driver intel_vgpu_mdev_driver = { >> >> int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) >> { >> - struct kvm *kvm = info->kvm; >> + struct kvm *kvm = info->vfio_device.kvm; >> struct kvm_memory_slot *slot; >> int idx; >> >> @@ -1743,7 +1697,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) >> >> int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) >> { >> - struct kvm *kvm = info->kvm; >> + struct kvm *kvm = info->vfio_device.kvm; >> struct kvm_memory_slot *slot; >> int idx; >> >> diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c >> index e8914024f5b1..a7d2a95796d3 100644 >> --- a/drivers/s390/crypto/vfio_ap_ops.c >> +++ b/drivers/s390/crypto/vfio_ap_ops.c >> @@ -1284,25 +1284,6 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev) >> } >> } >> >> -static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, >> - unsigned long action, void *data) >> -{ >> - int notify_rc = NOTIFY_OK; >> - struct ap_matrix_mdev *matrix_mdev; >> - >> - if (action != VFIO_GROUP_NOTIFY_SET_KVM) >> - return NOTIFY_OK; >> - >> - matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); >> - >> - if (!data) >> - vfio_ap_mdev_unset_kvm(matrix_mdev); >> - else if (vfio_ap_mdev_set_kvm(matrix_mdev, data)) >> - notify_rc = NOTIFY_DONE; >> - >> - return notify_rc; >> -} >> - >> static struct vfio_ap_queue *vfio_ap_find_queue(int apqn) >> { >> struct device *dev; >> @@ -1402,11 +1383,10 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) >> unsigned long events; >> int ret; >> >> - matrix_mdev->group_notifier.notifier_call = vfio_ap_mdev_group_notifier; >> - events = VFIO_GROUP_NOTIFY_SET_KVM; >> + if (!vdev->kvm) >> + return -EINVAL; >> >> - ret = vfio_register_notifier(vdev, VFIO_GROUP_NOTIFY, &events, >> - &matrix_mdev->group_notifier); >> + ret = vfio_ap_mdev_set_kvm(matrix_mdev, vdev->kvm); >> if (ret) >> return ret; >> >> @@ -1415,12 +1395,11 @@ static int vfio_ap_mdev_open_device(struct vfio_device *vdev) >> ret = vfio_register_notifier(vdev, VFIO_IOMMU_NOTIFY, &events, >> &matrix_mdev->iommu_notifier); >> if (ret) >> - goto out_unregister_group; >> + goto err_kvm; >> return 0; >> >> -out_unregister_group: >> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY, >> - &matrix_mdev->group_notifier); >> +err_kvm: >> + vfio_ap_mdev_unset_kvm(matrix_mdev); >> return ret; >> } >> >> @@ -1431,8 +1410,6 @@ static void vfio_ap_mdev_close_device(struct vfio_device *vdev) >> >> vfio_unregister_notifier(vdev, VFIO_IOMMU_NOTIFY, >> &matrix_mdev->iommu_notifier); >> - vfio_unregister_notifier(vdev, VFIO_GROUP_NOTIFY, >> - &matrix_mdev->group_notifier); >> vfio_ap_mdev_unset_kvm(matrix_mdev); >> } >> >> diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h >> index 648fcaf8104a..a26efd804d0d 100644 >> --- a/drivers/s390/crypto/vfio_ap_private.h >> +++ b/drivers/s390/crypto/vfio_ap_private.h >> @@ -81,8 +81,6 @@ struct ap_matrix { >> * @node: allows the ap_matrix_mdev struct to be added to a list >> * @matrix: the adapters, usage domains and control domains assigned to the >> * mediated matrix device. >> - * @group_notifier: notifier block used for specifying callback function for >> - * handling the VFIO_GROUP_NOTIFY_SET_KVM event >> * @iommu_notifier: notifier block used for specifying callback function for >> * handling the VFIO_IOMMU_NOTIFY_DMA_UNMAP even >> * @kvm: the struct holding guest's state >> @@ -94,7 +92,6 @@ struct ap_matrix_mdev { >> struct vfio_device vdev; >> struct list_head node; >> struct ap_matrix matrix; >> - struct notifier_block group_notifier; >> struct notifier_block iommu_notifier; >> struct kvm *kvm; >> crypto_hook pqap_hook; >> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c >> index cfcff7764403..831fc722e3f8 100644 >> --- a/drivers/vfio/vfio.c >> +++ b/drivers/vfio/vfio.c >> @@ -1083,10 +1083,21 @@ static struct file *vfio_device_open(struct vfio_device *device) >> >> mutex_lock(&device->dev_set->lock); >> device->open_count++; >> - if (device->open_count == 1 && device->ops->open_device) { >> - ret = device->ops->open_device(device); >> - if (ret) >> - goto err_undo_count; >> + if (device->open_count == 1) { >> + /* >> + * Here we pass the KVM pointer with the group under the read >> + * lock. If the device driver will use it, it must obtain a >> + * reference and release it during close_device. >> + */ >> + down_read(&device->group->group_rwsem); >> + device->kvm = device->group->kvm; >> + >> + if (device->ops->open_device) { >> + ret = device->ops->open_device(device); >> + if (ret) >> + goto err_undo_count; >> + } >> + up_read(&device->group->group_rwsem); >> } >> mutex_unlock(&device->dev_set->lock); >> >> @@ -1119,10 +1130,14 @@ static struct file *vfio_device_open(struct vfio_device *device) >> >> err_close_device: >> mutex_lock(&device->dev_set->lock); >> + down_read(&device->group->group_rwsem); >> if (device->open_count == 1 && device->ops->close_device) >> device->ops->close_device(device); >> err_undo_count: >> device->open_count--; >> + if (device->open_count == 0 && device->kvm) >> + device->kvm = NULL; >> + up_read(&device->group->group_rwsem); >> mutex_unlock(&device->dev_set->lock); >> module_put(device->dev->driver->owner); >> err_unassign_container: >> @@ -1315,9 +1330,13 @@ static int vfio_device_fops_release(struct inode *inode, struct file *filep) >> >> mutex_lock(&device->dev_set->lock); >> vfio_assert_device_open(device); >> + down_read(&device->group->group_rwsem); >> if (device->open_count == 1 && device->ops->close_device) >> device->ops->close_device(device); >> + up_read(&device->group->group_rwsem); >> device->open_count--; >> + if (device->open_count == 0) >> + device->kvm = NULL; >> mutex_unlock(&device->dev_set->lock); >> >> module_put(device->dev->driver->owner); >> @@ -1726,8 +1745,8 @@ EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent); >> * @file: VFIO group file >> * @kvm: KVM to link >> * >> - * The kvm pointer will be forwarded to all the vfio_device's attached to the >> - * VFIO file via the VFIO_GROUP_NOTIFY_SET_KVM notifier. >> + * When a VFIO device is first opened the KVM will be available in >> + * device->kvm if one was associated with the group. >> */ >> void vfio_file_set_kvm(struct file *file, struct kvm *kvm) >> { >> @@ -1738,8 +1757,6 @@ void vfio_file_set_kvm(struct file *file, struct kvm *kvm) >> >> down_write(&group->group_rwsem); >> group->kvm = kvm; >> - blocking_notifier_call_chain(&group->notifier, >> - VFIO_GROUP_NOTIFY_SET_KVM, kvm); >> up_write(&group->group_rwsem); >> } >> EXPORT_SYMBOL_GPL(vfio_file_set_kvm); >> @@ -2006,7 +2023,8 @@ static int vfio_register_iommu_notifier(struct vfio_group *group, >> struct vfio_iommu_driver *driver; >> int ret; >> >> - down_read(&group->group_rwsem); >> + lockdep_assert_held_read(&group->group_rwsem); >> + >> container = group->container; >> driver = container->iommu_driver; >> if (likely(driver && driver->ops->register_notifier)) >> @@ -2014,7 +2032,6 @@ static int vfio_register_iommu_notifier(struct vfio_group *group, >> events, nb); >> else >> ret = -ENOTTY; >> - up_read(&group->group_rwsem); >> >> return ret; >> } >> @@ -2026,7 +2043,8 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, >> struct vfio_iommu_driver *driver; >> int ret; >> >> - down_read(&group->group_rwsem); >> + lockdep_assert_held_read(&group->group_rwsem); >> + >> container = group->container; >> driver = container->iommu_driver; >> if (likely(driver && driver->ops->unregister_notifier)) >> @@ -2034,47 +2052,10 @@ static int vfio_unregister_iommu_notifier(struct vfio_group *group, >> nb); >> else >> ret = -ENOTTY; >> - up_read(&group->group_rwsem); >> >> return ret; >> } >> >> -static int vfio_register_group_notifier(struct vfio_group *group, >> - unsigned long *events, >> - struct notifier_block *nb) >> -{ >> - int ret; >> - bool set_kvm = false; >> - >> - if (*events & VFIO_GROUP_NOTIFY_SET_KVM) >> - set_kvm = true; >> - >> - /* clear known events */ >> - *events &= ~VFIO_GROUP_NOTIFY_SET_KVM; >> - >> - /* refuse to continue if still events remaining */ >> - if (*events) >> - return -EINVAL; >> - >> - ret = blocking_notifier_chain_register(&group->notifier, nb); >> - if (ret) >> - return ret; >> - >> - /* >> - * The attaching of kvm and vfio_group might already happen, so >> - * here we replay once upon registration. >> - */ >> - if (set_kvm) { >> - down_read(&group->group_rwsem); >> - if (group->kvm) >> - blocking_notifier_call_chain(&group->notifier, >> - VFIO_GROUP_NOTIFY_SET_KVM, >> - group->kvm); >> - up_read(&group->group_rwsem); >> - } >> - return 0; >> -} >> - >> int vfio_register_notifier(struct vfio_device *device, >> enum vfio_notify_type type, unsigned long *events, >> struct notifier_block *nb) >> @@ -2090,9 +2071,6 @@ int vfio_register_notifier(struct vfio_device *device, >> case VFIO_IOMMU_NOTIFY: >> ret = vfio_register_iommu_notifier(group, events, nb); >> break; >> - case VFIO_GROUP_NOTIFY: >> - ret = vfio_register_group_notifier(group, events, nb); >> - break; >> default: >> ret = -EINVAL; >> } >> @@ -2114,9 +2092,6 @@ int vfio_unregister_notifier(struct vfio_device *device, >> case VFIO_IOMMU_NOTIFY: >> ret = vfio_unregister_iommu_notifier(group, nb); >> break; >> - case VFIO_GROUP_NOTIFY: >> - ret = blocking_notifier_chain_unregister(&group->notifier, nb); >> - break; >> default: >> ret = -EINVAL; >> } >> diff --git a/include/linux/vfio.h b/include/linux/vfio.h >> index 45b287826ce6..aa888cc51757 100644 >> --- a/include/linux/vfio.h >> +++ b/include/linux/vfio.h >> @@ -36,6 +36,8 @@ struct vfio_device { >> struct vfio_device_set *dev_set; >> struct list_head dev_set_list; >> unsigned int migration_flags; >> + /* Driver must reference the kvm during open_device or never touch it */ >> + struct kvm *kvm; >> >> /* Members below here are private, not for driver use */ >> refcount_t refcount; >> @@ -155,15 +157,11 @@ extern int vfio_dma_rw(struct vfio_device *device, dma_addr_t user_iova, >> /* each type has independent events */ >> enum vfio_notify_type { >> VFIO_IOMMU_NOTIFY = 0, >> - VFIO_GROUP_NOTIFY = 1, >> }; >> >> /* events for VFIO_IOMMU_NOTIFY */ >> #define VFIO_IOMMU_NOTIFY_DMA_UNMAP BIT(0) >> >> -/* events for VFIO_GROUP_NOTIFY */ >> -#define VFIO_GROUP_NOTIFY_SET_KVM BIT(0) >> - >> extern int vfio_register_notifier(struct vfio_device *device, >> enum vfio_notify_type type, >> unsigned long *required_events, > Acked-by: Zhi Wang <zhi.a.wang@xxxxxxxxx>