The vfio_ap device driver registers for notification when the pointer to the KVM object for a guest is set. Recall that the KVM lock (kvm->lock) mutex must be taken outside of the matrix_dev->lock mutex to prevent the reporting by lockdep of a circular locking dependency (a.k.a., a lockdep splat): * see commit 0cc00c8d4050 ("Fix circular lockdep when setting/clearing crypto masks") * see commit 86956e70761b ("replace open coded locks for VFIO_GROUP_NOTIFY_SET_KVM notification") With the introduction of support for hot plugging/unplugging AP devices passed through to a KVM guest, a new guests_lock mutex is introduced to ensure the proper locking order is maintained: struct ap_matrix_dev { ... struct mutex guests_lock; ... } The matrix_dev->guests_lock controls access to the matrix_mdev instances that hold the state for AP devices that have been passed through to a KVM guest. This lock must be held: 1. To control access to the KVM pointer (matrix_mdev->kvm) while the vfio_ap device driver is using it to plug/unplug AP devices passed through to the KVM guest. 2. To add matrix_mdev instances to or remove them from matrix_dev->mdev_list. This is necessary to ensure the proper locking order when the list is iterated to find an ap_matrix_mdev instance for the purpose of plugging/unplugging AP devices passed through to a KVM guest; for example, when a queue device is bound to (probe callback) or removed from (remove callback) the vfio_ap device driver. For example, when a queue device is removed from the vfio_ap device driver, if the adapter is passed through to a KVM guest, it will have to be unplugged. Since the vfio_ap device driver only knows the APQN of the queue device when it is removed, in order to figure out whether the adapter is passed through, the matrix_mdev object to which the queue is assigned will have to be found. Its KVM pointer (matrix_mdev->kvm) can then be used to determine if the mediated device is passed through (matrix_mdev->kvm != NULL) and if so, to unplug the adapter. In addition to introducing the matrix_mdev->guests_lock mutex, the matrix_dev->lock mutex is being renamed to matrix_dev->mdevs_lock to better reflect its purpose, which is to control access to the state of the mediated devices under the control of the vfio_ap device driver. It is not necessary to take the matrix_dev->guests_lock to access the KVM pointer if the pointer is not used to plug/unplug AP devices; however, in this case, the matrix_dev->mdevs_lock must be held in order to access the KVM pointer since it set and cleared under the protection of the mdevs_lock. A case in point is the function that handles interception of the PQAP(AQIC) instruction sub-function by the host. This handler needs to access the KVM pointer, but only for the purposes of setting or clearing IRQ resources, so only the matrix_dev->mdevs_lock needs to be held. The proper locking order must be maintained whenever plugging/unplugging AP devices passed through to a KVM guest under the control of the vfio_ap device driver: matrix_dev->guests_lock matrix_mdev->kvm->lock matrix_dev->mdevs_lock Signed-off-by: Tony Krowiak <akrowiak@xxxxxxxxxxxxx> --- drivers/s390/crypto/vfio_ap_drv.c | 7 ++- drivers/s390/crypto/vfio_ap_ops.c | 81 +++++++++++++++------------ drivers/s390/crypto/vfio_ap_private.h | 17 ++++-- 3 files changed, 61 insertions(+), 44 deletions(-) diff --git a/drivers/s390/crypto/vfio_ap_drv.c b/drivers/s390/crypto/vfio_ap_drv.c index 5013a5531171..e8f3540ebfda 100644 --- a/drivers/s390/crypto/vfio_ap_drv.c +++ b/drivers/s390/crypto/vfio_ap_drv.c @@ -68,7 +68,7 @@ static ssize_t status_show(struct device *dev, struct ap_matrix_mdev *matrix_mdev; struct ap_device *apdev = to_ap_dev(dev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->guests_lock); q = dev_get_drvdata(&apdev->device); matrix_mdev = vfio_ap_mdev_for_queue(q); @@ -84,7 +84,7 @@ static ssize_t status_show(struct device *dev, AP_QUEUE_UNASSIGNED); } - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->guests_lock); return nchars; } @@ -155,8 +155,9 @@ static int vfio_ap_matrix_dev_create(void) goto matrix_alloc_err; } - mutex_init(&matrix_dev->lock); + mutex_init(&matrix_dev->mdevs_lock); INIT_LIST_HEAD(&matrix_dev->mdev_list); + mutex_init(&matrix_dev->guests_lock); dev_set_name(&matrix_dev->device, "%s", VFIO_AP_DEV_NAME); matrix_dev->device.parent = root_device; diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c index f2b98f347f9f..623a4b38676d 100644 --- a/drivers/s390/crypto/vfio_ap_ops.c +++ b/drivers/s390/crypto/vfio_ap_ops.c @@ -278,7 +278,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu) return -EOPNOTSUPP; apqn = vcpu->run->s.regs.gprs[0] & 0xffff; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); if (!vcpu->kvm->arch.crypto.pqap_hook) goto out_unlock; @@ -305,7 +305,7 @@ static int handle_pqap(struct kvm_vcpu *vcpu) out_unlock: memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); vcpu->run->s.regs.gprs[1] >>= 32; - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return 0; } @@ -396,9 +396,9 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->shadow_apcb); hash_init(matrix_mdev->qtable.queues); mdev_set_drvdata(mdev, matrix_mdev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->guests_lock); list_add(&matrix_mdev->node, &matrix_dev->mdev_list); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->guests_lock); ret = vfio_register_emulated_iommu_dev(&matrix_mdev->vdev); if (ret) @@ -407,9 +407,9 @@ static int vfio_ap_mdev_probe(struct mdev_device *mdev) return 0; err_list: - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->guests_lock); list_del(&matrix_mdev->node); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->guests_lock); vfio_uninit_group_dev(&matrix_mdev->vdev); kfree(matrix_mdev); err_dec_available: @@ -472,11 +472,11 @@ static void vfio_ap_mdev_remove(struct mdev_device *mdev) vfio_unregister_group_dev(&matrix_mdev->vdev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); vfio_ap_mdev_reset_queues(matrix_mdev); vfio_ap_mdev_unlink_fr_queues(matrix_mdev); list_del(&matrix_mdev->node); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); vfio_uninit_group_dev(&matrix_mdev->vdev); kfree(matrix_mdev); atomic_inc(&matrix_dev->available_instances); @@ -652,7 +652,8 @@ static ssize_t assign_adapter_store(struct device *dev, struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->guests_lock); + mutex_lock(&matrix_dev->mdevs_lock); /* If the KVM guest is running, disallow assignment of adapter */ if (matrix_mdev->kvm) { @@ -683,7 +684,8 @@ static ssize_t assign_adapter_store(struct device *dev, vfio_ap_mdev_filter_matrix(apm, matrix_mdev->matrix.aqm, matrix_mdev); ret = count; done: - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_dev->guests_lock); return ret; } @@ -726,7 +728,7 @@ static ssize_t unassign_adapter_store(struct device *dev, unsigned long apid; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); /* If the KVM guest is running, disallow unassignment of adapter */ if (matrix_mdev->kvm) { @@ -751,7 +753,7 @@ static ssize_t unassign_adapter_store(struct device *dev, ret = count; done: - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return ret; } static DEVICE_ATTR_WO(unassign_adapter); @@ -806,7 +808,8 @@ static ssize_t assign_domain_store(struct device *dev, struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_apqi = matrix_mdev->matrix.aqm_max; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->guests_lock); + mutex_lock(&matrix_dev->mdevs_lock); /* If the KVM guest is running, disallow assignment of domain */ if (matrix_mdev->kvm) { @@ -836,7 +839,8 @@ static ssize_t assign_domain_store(struct device *dev, vfio_ap_mdev_filter_matrix(matrix_mdev->matrix.apm, aqm, matrix_mdev); ret = count; done: - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_dev->guests_lock); return ret; } @@ -879,7 +883,7 @@ static ssize_t unassign_domain_store(struct device *dev, unsigned long apqi; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); /* If the KVM guest is running, disallow unassignment of domain */ if (matrix_mdev->kvm) { @@ -905,7 +909,7 @@ static ssize_t unassign_domain_store(struct device *dev, ret = count; done: - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return ret; } static DEVICE_ATTR_WO(unassign_domain); @@ -932,7 +936,7 @@ static ssize_t assign_control_domain_store(struct device *dev, unsigned long id; struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); /* If the KVM guest is running, disallow assignment of control domain */ if (matrix_mdev->kvm) { @@ -958,7 +962,7 @@ static ssize_t assign_control_domain_store(struct device *dev, vfio_ap_mdev_filter_cdoms(matrix_mdev); ret = count; done: - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return ret; } static DEVICE_ATTR_WO(assign_control_domain); @@ -986,7 +990,7 @@ static ssize_t unassign_control_domain_store(struct device *dev, struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_domid = matrix_mdev->matrix.adm_max; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); /* If a KVM guest is running, disallow unassignment of control domain */ if (matrix_mdev->kvm) { @@ -1009,7 +1013,7 @@ static ssize_t unassign_control_domain_store(struct device *dev, ret = count; done: - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return ret; } static DEVICE_ATTR_WO(unassign_control_domain); @@ -1025,13 +1029,13 @@ static ssize_t control_domains_show(struct device *dev, struct ap_matrix_mdev *matrix_mdev = dev_get_drvdata(dev); unsigned long max_domid = matrix_mdev->matrix.adm_max; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); for_each_set_bit_inv(id, matrix_mdev->matrix.adm, max_domid + 1) { n = sprintf(bufpos, "%04lx\n", id); bufpos += n; nchars += n; } - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return nchars; } @@ -1054,7 +1058,7 @@ static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, apid1 = find_first_bit_inv(matrix_mdev->matrix.apm, napm_bits); apqi1 = find_first_bit_inv(matrix_mdev->matrix.aqm, naqm_bits); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); if ((apid1 < napm_bits) && (apqi1 < naqm_bits)) { for_each_set_bit_inv(apid, matrix_mdev->matrix.apm, napm_bits) { @@ -1080,7 +1084,7 @@ static ssize_t matrix_show(struct device *dev, struct device_attribute *attr, } } - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return nchars; } @@ -1134,13 +1138,15 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; up_write(&kvm->arch.crypto.pqap_hook_rwsem); + mutex_lock(&matrix_dev->guests_lock); mutex_lock(&kvm->lock); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); list_for_each_entry(m, &matrix_dev->mdev_list, node) { if (m != matrix_mdev && m->kvm == kvm) { + mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&kvm->lock); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->guests_lock); return -EPERM; } } @@ -1151,8 +1157,9 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, matrix_mdev->shadow_apcb.aqm, matrix_mdev->shadow_apcb.adm); + mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&kvm->lock); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->guests_lock); } return 0; @@ -1210,16 +1217,18 @@ static void vfio_ap_mdev_unset_kvm(struct ap_matrix_mdev *matrix_mdev, kvm->arch.crypto.pqap_hook = NULL; up_write(&kvm->arch.crypto.pqap_hook_rwsem); + mutex_lock(&matrix_dev->guests_lock); mutex_lock(&kvm->lock); - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); kvm_arch_crypto_clear_masks(kvm); vfio_ap_mdev_reset_queues(matrix_mdev); kvm_put_kvm(kvm); matrix_mdev->kvm = NULL; + mutex_unlock(&matrix_dev->mdevs_lock); mutex_unlock(&kvm->lock); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->guests_lock); } } @@ -1396,7 +1405,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, container_of(vdev, struct ap_matrix_mdev, vdev); int ret; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); switch (cmd) { case VFIO_DEVICE_GET_INFO: ret = vfio_ap_mdev_get_device_info(arg); @@ -1408,7 +1417,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct vfio_device *vdev, ret = -EOPNOTSUPP; break; } - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); return ret; } @@ -1493,7 +1502,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) if (!q) return -ENOMEM; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->guests_lock); + mutex_lock(&matrix_dev->mdevs_lock); q->apqn = to_ap_queue(&apdev->device)->qid; q->saved_isc = VFIO_AP_ISC_INVALID; vfio_ap_queue_link_mdev(q); @@ -1504,7 +1514,8 @@ int vfio_ap_mdev_probe_queue(struct ap_device *apdev) q->matrix_mdev); } dev_set_drvdata(&apdev->device, q); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); + mutex_unlock(&matrix_dev->guests_lock); return 0; } @@ -1514,7 +1525,7 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev) unsigned long apid; struct vfio_ap_queue *q; - mutex_lock(&matrix_dev->lock); + mutex_lock(&matrix_dev->mdevs_lock); q = dev_get_drvdata(&apdev->device); if (q->matrix_mdev) { @@ -1528,5 +1539,5 @@ void vfio_ap_mdev_remove_queue(struct ap_device *apdev) vfio_ap_mdev_reset_queue(q, 1); dev_set_drvdata(&apdev->device, NULL); kfree(q); - mutex_unlock(&matrix_dev->lock); + mutex_unlock(&matrix_dev->mdevs_lock); } diff --git a/drivers/s390/crypto/vfio_ap_private.h b/drivers/s390/crypto/vfio_ap_private.h index fa11a7e91e24..7e82a72d2083 100644 --- a/drivers/s390/crypto/vfio_ap_private.h +++ b/drivers/s390/crypto/vfio_ap_private.h @@ -33,20 +33,25 @@ * @available_instances: number of mediated matrix devices that can be created * @info: the struct containing the output from the PQAP(QCI) instruction * @mdev_list: the list of mediated matrix devices created - * @lock: mutex for locking the AP matrix device. This lock will be - * taken every time we fiddle with state managed by the vfio_ap - * driver, be it using @mdev_list or writing the state of a - * single ap_matrix_mdev device. It's quite coarse but we don't - * expect much contention. + * @mdevs_lock: mutex for locking the ap_matrix_mdev devices under the control + * of the vfio_ap device driver. This lock will be taken every time + * we fiddle with state of an ap_matrix_mdev device. It's quite + * coarse but we don't expect much contention. * @vfio_ap_drv: the vfio_ap device driver + * @guests_lock: mutex for controlling access to a guest that is using AP + * devices passed through by the vfio_ap device driver. This lock + * will be taken when the AP devices are plugged into or unplugged + * from a guest, and when an ap_matrix_mdev device is added to or + * removed from @mdev_list or the list is iterated. */ struct ap_matrix_dev { struct device device; atomic_t available_instances; struct ap_config_info info; struct list_head mdev_list; - struct mutex lock; + struct mutex mdevs_lock; struct ap_driver *vfio_ap_drv; + struct mutex guests_lock; }; extern struct ap_matrix_dev *matrix_dev; -- 2.31.1