On Tue, 24 Nov 2020 16:40:11 -0500 Tony Krowiak <akrowiak@xxxxxxxxxxxxx> wrote: > Let's hot plug/unplug adapters, domains and control domains assigned to or > unassigned from an AP matrix mdev device while it is in use by a guest per > the following rules: > > * Assign an adapter to mdev's matrix: > > The adapter will be hot plugged into the guest under the following > conditions: > 1. The adapter is not yet assigned to the guest's matrix > 2. At least one domain is assigned to the guest's matrix > 3. Each APQN derived from the APID of the newly assigned adapter and > the APQIs of the domains already assigned to the guest's > matrix references a queue device bound to the vfio_ap device driver. > > The adapter and each domain assigned to the mdev's matrix will be hot > plugged into the guest under the following conditions: > 1. The adapter is not yet assigned to the guest's matrix > 2. No domains are assigned to the guest's matrix > 3 At least one domain is assigned to the mdev's matrix > 4. Each APQN derived from the APID of the newly assigned adapter and > the APQIs of the domains assigned to the mdev's matrix references a > queue device bound to the vfio_ap device driver. > > * Unassign an adapter from mdev's matrix: > > The adapter will be hot unplugged from the KVM guest if it is > assigned to the guest's matrix. > > * Assign a domain to mdev's matrix: > > The domain will be hot plugged into the guest under the following > conditions: > 1. The domain is not yet assigned to the guest's matrix > 2. At least one adapter is assigned to the guest's matrix > 3. Each APQN derived from the APQI of the newly assigned domain and > the APIDs of the adapters already assigned to the guest's > matrix references a queue device bound to the vfio_ap device driver. > > The domain and each adapter assigned to the mdev's matrix will be hot > plugged into the guest under the following conditions: > 1. The domain is not yet assigned to the guest's matrix > 2. No adapters are assigned to the guest's matrix > 3 At least one adapter is assigned to the mdev's matrix > 4. Each APQN derived from the APQI of the newly assigned domain and > the APIDs of the adapters assigned to the mdev's matrix references a > queue device bound to the vfio_ap device driver. > > * Unassign adapter from mdev's matrix: > > The domain will be hot unplugged from the KVM guest if it is > assigned to the guest's matrix. > > * Assign a control domain: > > The control domain will be hot plugged into the KVM guest if it is not > assigned to the guest's APCB. The AP architecture ensures a guest will > only get access to the control domain if it is in the host's AP > configuration, so there is no risk in hot plugging it; however, it will > become automatically available to the guest when it is added to the host > configuration. > > * Unassign a control domain: > > The control domain will be hot unplugged from the KVM guest if it is > assigned to the guest's APCB. This is where things start getting tricky. E.g. do we need to revise filtering after an unassign? (For example an assign_adapter X didn't change the shadow, because queue XY was missing, but now we unplug domain Y. Should the adapter X pop up? I guess it should.) > > Note: Now that hot plug/unplug is implemented, there is the possibility > that an assignment/unassignment of an adapter, domain or control > domain could be initiated while the guest is starting, so the > matrix device lock will be taken for the group notification callback > that initializes the guest's APCB when the KVM pointer is made > available to the vfio_ap device driver. > > Signed-off-by: Tony Krowiak <akrowiak@xxxxxxxxxxxxx> > --- > drivers/s390/crypto/vfio_ap_ops.c | 190 +++++++++++++++++++++++++----- > 1 file changed, 159 insertions(+), 31 deletions(-) > > diff --git a/drivers/s390/crypto/vfio_ap_ops.c b/drivers/s390/crypto/vfio_ap_ops.c > index 586ec5776693..4f96b7861607 100644 > --- a/drivers/s390/crypto/vfio_ap_ops.c > +++ b/drivers/s390/crypto/vfio_ap_ops.c > @@ -631,6 +631,60 @@ static void vfio_ap_mdev_manage_qlinks(struct ap_matrix_mdev *matrix_mdev, > } > } > > +static bool vfio_ap_assign_apid_to_apcb(struct ap_matrix_mdev *matrix_mdev, > + unsigned long apid) > +{ > + unsigned long apqi, apqn; > + unsigned long *aqm = matrix_mdev->shadow_apcb.aqm; > + > + /* > + * If the APID is already assigned to the guest's shadow APCB, there is > + * no need to assign it. > + */ > + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) > + return false; > + > + /* > + * If no domains have yet been assigned to the shadow APCB and one or > + * more domains have been assigned to the matrix mdev, then use > + * the domains assigned to the matrix mdev; otherwise, there is nothing > + * to assign to the shadow APCB. > + */ > + if (bitmap_empty(matrix_mdev->shadow_apcb.aqm, AP_DOMAINS)) { > + if (bitmap_empty(matrix_mdev->matrix.aqm, AP_DOMAINS)) > + return false; > + > + aqm = matrix_mdev->matrix.aqm; > + } > + > + /* Make sure all APQNs are bound to the vfio_ap driver */ > + for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { > + apqn = AP_MKQID(apid, apqi); > + > + if (vfio_ap_mdev_get_queue(matrix_mdev, apqn) == NULL) > + return false; > + } > + > + set_bit_inv(apid, matrix_mdev->shadow_apcb.apm); > + > + /* > + * If we verified APQNs using the domains assigned to the matrix mdev, > + * then copy the APQIs of those domains into the guest's APCB > + */ > + if (bitmap_empty(matrix_mdev->shadow_apcb.aqm, AP_DOMAINS)) > + bitmap_copy(matrix_mdev->shadow_apcb.aqm, > + matrix_mdev->matrix.aqm, AP_DOMAINS); > + > + return true; > +} What is the rationale behind the shadow aqm empty special handling? I.e. why not simply: static bool vfio_ap_assign_apid_to_apcb(struct ap_matrix_mdev *matrix_mdev, unsigned long apid) { unsigned long apqi, apqn; unsigned long *aqm = matrix_mdev->shadow_apcb.aqm; /* * If the APID is already assigned to the guest's shadow APCB, there is * no need to assign it. */ if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) return false; /* Make sure all APQNs are bound to the vfio_ap driver */ for_each_set_bit_inv(apqi, aqm, AP_DOMAINS) { apqn = AP_MKQID(apid, apqi); if (vfio_ap_mdev_get_queue(matrix_mdev, apqn) == NULL) return false; } set_bit_inv(apid, matrix_mdev->shadow_apcb.apm); return true; } Please answer the questions I've asked, and note that I will have to return to this patch, later. Regards, Halil > + > +static void vfio_ap_mdev_hot_plug_adapter(struct ap_matrix_mdev *matrix_mdev, > + unsigned long apid) > +{ > + if (vfio_ap_assign_apid_to_apcb(matrix_mdev, apid)) > + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > +} > + > /** > * assign_adapter_store > * > @@ -673,10 +727,6 @@ static ssize_t assign_adapter_store(struct device *dev, > struct mdev_device *mdev = mdev_from_dev(dev); > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > > - /* If the guest is running, disallow assignment of adapter */ > - if (matrix_mdev->kvm) > - return -EBUSY; > - > ret = kstrtoul(buf, 0, &apid); > if (ret) > return ret; > @@ -698,12 +748,22 @@ static ssize_t assign_adapter_store(struct device *dev, > } > set_bit_inv(apid, matrix_mdev->matrix.apm); > vfio_ap_mdev_manage_qlinks(matrix_mdev, LINK_APID, apid); > + vfio_ap_mdev_hot_plug_adapter(matrix_mdev, apid); > mutex_unlock(&matrix_dev->lock); > > return count; > } > static DEVICE_ATTR_WO(assign_adapter); > > +static void vfio_ap_mdev_hot_unplug_adapter(struct ap_matrix_mdev *matrix_mdev, > + unsigned long apid) > +{ > + if (test_bit_inv(apid, matrix_mdev->shadow_apcb.apm)) { > + clear_bit_inv(apid, matrix_mdev->shadow_apcb.apm); > + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > + } > +} > + > /** > * unassign_adapter_store > * > @@ -730,10 +790,6 @@ static ssize_t unassign_adapter_store(struct device *dev, > struct mdev_device *mdev = mdev_from_dev(dev); > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > > - /* If the guest is running, disallow un-assignment of adapter */ > - if (matrix_mdev->kvm) > - return -EBUSY; > - > ret = kstrtoul(buf, 0, &apid); > if (ret) > return ret; > @@ -744,12 +800,67 @@ static ssize_t unassign_adapter_store(struct device *dev, > mutex_lock(&matrix_dev->lock); > clear_bit_inv((unsigned long)apid, matrix_mdev->matrix.apm); > vfio_ap_mdev_manage_qlinks(matrix_mdev, UNLINK_APID, apid); > + vfio_ap_mdev_hot_unplug_adapter(matrix_mdev, apid); > mutex_unlock(&matrix_dev->lock); > > return count; > } > static DEVICE_ATTR_WO(unassign_adapter); > > +static bool vfio_ap_assign_apqi_to_apcb(struct ap_matrix_mdev *matrix_mdev, > + unsigned long apqi) > +{ > + unsigned long apid, apqn; > + unsigned long *apm = matrix_mdev->shadow_apcb.apm; > + > + /* > + * If the APQI is already assigned to the guest's shadow APCB, there is > + * no need to assign it. > + */ > + if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) > + return false; > + > + /* > + * If no adapters have yet been assigned to the shadow APCB and one or > + * more adapters have been assigned to the matrix mdev, then use > + * the adapters assigned to the matrix mdev; otherwise, there is nothing > + * to assign to the shadow APCB. > + */ > + if (bitmap_empty(matrix_mdev->shadow_apcb.apm, AP_DEVICES)) { > + if (bitmap_empty(matrix_mdev->matrix.apm, AP_DEVICES)) > + return false; > + > + apm = matrix_mdev->matrix.apm; > + } > + > + /* Make sure all APQNs are bound to the vfio_ap driver */ > + for_each_set_bit_inv(apid, apm, AP_DEVICES) { > + apqn = AP_MKQID(apid, apqi); > + > + if (vfio_ap_mdev_get_queue(matrix_mdev, apqn) == NULL) > + return false; > + } > + > + set_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); > + > + /* > + * If we verified APQNs using the adapters assigned to the matrix mdev, > + * then copy the APIDs of those adapters into the guest's APCB > + */ > + if (bitmap_empty(matrix_mdev->shadow_apcb.apm, AP_DEVICES)) > + bitmap_copy(matrix_mdev->shadow_apcb.apm, > + matrix_mdev->matrix.apm, AP_DEVICES); > + > + return true; > +} > + > +static void vfio_ap_mdev_hot_plug_domain(struct ap_matrix_mdev *matrix_mdev, > + unsigned long apqi) > +{ > + if (vfio_ap_assign_apqi_to_apcb(matrix_mdev, apqi)) > + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > +} > + > /** > * assign_domain_store > * > @@ -793,10 +904,6 @@ static ssize_t assign_domain_store(struct device *dev, > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > unsigned long max_apqi = matrix_mdev->matrix.aqm_max; > > - /* If the guest is running, disallow assignment of domain */ > - if (matrix_mdev->kvm) > - return -EBUSY; > - > ret = kstrtoul(buf, 0, &apqi); > if (ret) > return ret; > @@ -817,12 +924,21 @@ static ssize_t assign_domain_store(struct device *dev, > } > set_bit_inv(apqi, matrix_mdev->matrix.aqm); > vfio_ap_mdev_manage_qlinks(matrix_mdev, LINK_APQI, apqi); > + vfio_ap_mdev_hot_plug_domain(matrix_mdev, apqi); > mutex_unlock(&matrix_dev->lock); > > return count; > } > static DEVICE_ATTR_WO(assign_domain); > > +static void vfio_ap_mdev_hot_unplug_domain(struct ap_matrix_mdev *matrix_mdev, > + unsigned long apqi) > +{ > + if (test_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm)) { > + clear_bit_inv(apqi, matrix_mdev->shadow_apcb.aqm); > + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > + } > +} > > /** > * unassign_domain_store > @@ -850,10 +966,6 @@ static ssize_t unassign_domain_store(struct device *dev, > struct mdev_device *mdev = mdev_from_dev(dev); > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > > - /* If the guest is running, disallow un-assignment of domain */ > - if (matrix_mdev->kvm) > - return -EBUSY; > - > ret = kstrtoul(buf, 0, &apqi); > if (ret) > return ret; > @@ -864,12 +976,22 @@ static ssize_t unassign_domain_store(struct device *dev, > mutex_lock(&matrix_dev->lock); > clear_bit_inv((unsigned long)apqi, matrix_mdev->matrix.aqm); > vfio_ap_mdev_manage_qlinks(matrix_mdev, UNLINK_APQI, apqi); > + vfio_ap_mdev_hot_unplug_domain(matrix_mdev, apqi); > mutex_unlock(&matrix_dev->lock); > > return count; > } > static DEVICE_ATTR_WO(unassign_domain); > > +static void vfio_ap_mdev_hot_plug_ctl_domain(struct ap_matrix_mdev *matrix_mdev, > + unsigned long domid) > +{ > + if (!test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) { > + set_bit_inv(domid, matrix_mdev->shadow_apcb.adm); > + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > + } > +} > + > /** > * assign_control_domain_store > * > @@ -895,10 +1017,6 @@ static ssize_t assign_control_domain_store(struct device *dev, > struct mdev_device *mdev = mdev_from_dev(dev); > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > > - /* If the guest is running, disallow assignment of control domain */ > - if (matrix_mdev->kvm) > - return -EBUSY; > - > ret = kstrtoul(buf, 0, &id); > if (ret) > return ret; > @@ -914,12 +1032,23 @@ static ssize_t assign_control_domain_store(struct device *dev, > if (!mutex_trylock(&matrix_dev->lock)) > return -EBUSY; > set_bit_inv(id, matrix_mdev->matrix.adm); > + vfio_ap_mdev_hot_plug_ctl_domain(matrix_mdev, id); > mutex_unlock(&matrix_dev->lock); > > return count; > } > static DEVICE_ATTR_WO(assign_control_domain); > > +static void > +vfio_ap_mdev_hot_unplug_ctl_domain(struct ap_matrix_mdev *matrix_mdev, > + unsigned long domid) > +{ > + if (test_bit_inv(domid, matrix_mdev->shadow_apcb.adm)) { > + clear_bit_inv(domid, matrix_mdev->shadow_apcb.adm); > + vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > + } > +} > + > /** > * unassign_control_domain_store > * > @@ -946,10 +1075,6 @@ static ssize_t unassign_control_domain_store(struct device *dev, > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > unsigned long max_domid = matrix_mdev->matrix.adm_max; > > - /* If the guest is running, disallow un-assignment of control domain */ > - if (matrix_mdev->kvm) > - return -EBUSY; > - > ret = kstrtoul(buf, 0, &domid); > if (ret) > return ret; > @@ -958,6 +1083,7 @@ static ssize_t unassign_control_domain_store(struct device *dev, > > mutex_lock(&matrix_dev->lock); > clear_bit_inv(domid, matrix_mdev->matrix.adm); > + vfio_ap_mdev_hot_unplug_ctl_domain(matrix_mdev, domid); > mutex_unlock(&matrix_dev->lock); > > return count; > @@ -1099,8 +1225,6 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, > { > struct ap_matrix_mdev *m; > > - mutex_lock(&matrix_dev->lock); > - > list_for_each_entry(m, &matrix_dev->mdev_list, node) { > if ((m != matrix_mdev) && (m->kvm == kvm)) { > mutex_unlock(&matrix_dev->lock); > @@ -1111,7 +1235,6 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev, > matrix_mdev->kvm = kvm; > kvm_get_kvm(kvm); > kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook; > - mutex_unlock(&matrix_dev->lock); > > return 0; > } > @@ -1148,7 +1271,7 @@ static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb, > static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, > unsigned long action, void *data) > { > - int ret; > + int ret = NOTIFY_DONE; > struct ap_matrix_mdev *matrix_mdev; > > if (action != VFIO_GROUP_NOTIFY_SET_KVM) > @@ -1156,23 +1279,28 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb, > > matrix_mdev = container_of(nb, struct ap_matrix_mdev, group_notifier); > > + mutex_lock(&matrix_dev->lock); > + > if (!data) { > if (matrix_mdev->kvm) > kvm_put_kvm(matrix_mdev->kvm); > > matrix_mdev->kvm = NULL; > > - return NOTIFY_OK; > + ret = NOTIFY_OK; > + goto done; > } > > ret = vfio_ap_mdev_set_kvm(matrix_mdev, data); > if (ret) > - return NOTIFY_DONE; > + goto done; > > vfio_ap_mdev_init_apcb(matrix_mdev); > vfio_ap_mdev_commit_shadow_apcb(matrix_mdev); > > - return NOTIFY_OK; > +done: > + mutex_unlock(&matrix_dev->lock); > + return ret; > } > > static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,