On Fri, 22 Feb 2019 16:29:58 +0100 Pierre Morel <pmorel@xxxxxxxxxxxxx> wrote: > We register the AP PQAP instruction hook during the open > of the mediated device. And unregister it on release. > > In the AP PQAP instruction hook, if we receive a demand to > enable IRQs, > - we retrieve the vfio_ap_queue based on the APQN we receive > in REG1, > - we retrieve the page of the guest address, (NIB), from > register REG2 > - we the mediated device to use the VFIO pinning infratrsucture > to pin the page of the guest address, > - we retrieve the pointer to KVM to register the guest ISC > and retrieve the host ISC > - finaly we activate GISA > > If we receive a demand to disable IRQs, > - we deactivate GISA > - unregister from the GIB > - unping the NIB > > Signed-off-by: Pierre Morel <pmorel@xxxxxxxxxxxxx> > --- [..] > + */ > +static void vfio_ap_free_irq(struct vfio_ap_queue *q) > +{ > + if (!q) > + return; > + if (q->g_pfn) > + vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &q->g_pfn, 1); > + if (q->isc) > + kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->isc); Ain't isc 0 a perfectly legit isc? > + q->nib = 0; > + q->isc = 0; > + q->g_pfn = 0; > +} > + [..] > @@ -109,10 +131,16 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev) > static int vfio_ap_mdev_remove(struct mdev_device *mdev) > { > struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev); > + struct vfio_ap_queue *q, *qtmp; > > if (matrix_mdev->kvm) > return -EBUSY; > > + list_for_each_entry_safe(q, qtmp, &matrix_mdev->qlist, list) { > + q->matrix_mdev = NULL; > + vfio_ap_mdev_reset_queue(q); > + list_move(&q->list, &matrix_dev->free_list); How about matrix_dev->lock? I guess you should protect free_list with it. If not maybe a code comment would help not stumble over this. > + } > mutex_lock(&matrix_dev->lock); > list_del(&matrix_mdev->node); > mutex_unlock(&matrix_dev->lock); [..] > +/** > + * vfio_ap_setirq: Enable Interruption for a APQN > + * > + * @dev: the device associated with the ap_queue > + * @q: the vfio_ap_queue holding AQIC parameters > + * > + * Pin the NIB saved in *q > + * Register the guest ISC to GIB interface and retrieve the > + * host ISC to issue the host side PQAP/AQIC > + * > + * Response.status may be set to following Response Code in case of error: > + * - AP_RESPONSE_INVALID_ADDRESS: vfio_pin_pages failed > + * - AP_RESPONSE_OTHERWISE_CHANGED: Hypervizor GISA internal error > + * > + * Otherwise return the ap_queue_status returned by the ap_aqic() > + */ > +static struct ap_queue_status vfio_ap_setirq(struct vfio_ap_queue *q) > +{ > + struct ap_qirq_ctrl aqic_gisa = {}; > + struct ap_queue_status status = {}; > + struct kvm_s390_gisa *gisa; > + struct kvm *kvm; > + unsigned long g_pfn, h_nib, h_pfn; > + int ret; > + > + kvm = q->matrix_mdev->kvm; > + gisa = kvm->arch.gisa_int.origin; > + > + g_pfn = q->nib >> PAGE_SHIFT; > + ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1, > + IOMMU_READ | IOMMU_WRITE, &h_pfn); > + switch (ret) { > + case 1: > + break; > + case -EINVAL: > + case -E2BIG: > + status.response_code = AP_RESPONSE_INVALID_ADDRESS; > + /* Fallthrough */ > + default: > + return status; > + } > + > + h_nib = (h_pfn << PAGE_SHIFT) | (q->nib & ~PAGE_MASK); > + aqic_gisa.gisc = q->isc; > + aqic_gisa.isc = kvm_s390_gisc_register(kvm, q->isc); > + aqic_gisa.ir = 1; > + aqic_gisa.gisa = gisa->next_alert >> 4; > + > + status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib); > + switch (status.response_code) { > + case AP_RESPONSE_NORMAL: > + if (q->g_pfn) > + vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), > + &q->g_pfn, 1); Shouldn't you call kvm_s390_gisc_unregister() here. > + q->g_pfn = g_pfn; > + break; > + case AP_RESPONSE_OTHERWISE_CHANGED: > + vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1); and here. > + break; > + case AP_RESPONSE_INVALID_GISA: > + status.response_code = AP_RESPONSE_INVALID_ADDRESS; > + default: /* Fall Through */ > + pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn, > + status.response_code); > + vfio_ap_free_irq(q); This guy won't unpin g_pfn but only q->g_pfn if not zero :/ > + break; > + } > + > + return status; > +} > + > +/** > + * handle_pqap: PQAP instruction callback > + * > + * @vcpu: The vcpu on which we received the PQAP instruction > + * > + * Get the general register contents to initialize internal variables. > + * REG[0]: APQN > + * REG[1]: IR and ISC > + * REG[2]: NIB > + * > + * Response.status may be set to following Response Code: > + * - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available > + * - AP_RESPONSE_DECONFIGURED: if the queue is not configured > + * - AP_RESPONSE_NORMAL (0) : in case of successs > + * Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible > RC. > + * > + * Return 0 if we could handle the request inside KVM. > + * otherwise, returns -EOPNOTSUPP to let QEMU handle the fault. > + */ > +static int handle_pqap(struct kvm_vcpu *vcpu) > +{ > + uint64_t status; > + uint16_t apqn; > + struct vfio_ap_queue *q; > + struct ap_queue_status qstatus = {}; > + struct ap_matrix_mdev *matrix_mdev; > + > + /* If we do not use the AIV facility just go to userland */ > + if (!(vcpu->arch.sie_block->eca & ECA_AIV)) > + return -EOPNOTSUPP; > + > + apqn = vcpu->run->s.regs.gprs[0] & 0xffff; > + matrix_mdev = vcpu->kvm->arch.crypto.vfio_private; > + if (!matrix_mdev) > + return -EOPNOTSUPP; > + q = vfio_ap_get_queue(apqn, &matrix_mdev->qlist); This get is not a 'refcount affecting get' any more... > + if (!q) { > + qstatus.response_code = AP_RESPONSE_Q_NOT_AVAIL; > + goto out; > + } > + > + status = vcpu->run->s.regs.gprs[1]; > + > + /* If IR bit(16) is set we enable the interrupt */ > + if ((status >> (63 - 16)) & 0x01) { > + q->isc = status & 0x07; > + q->nib = vcpu->run->s.regs.gprs[2]; ... and I don't see what should prevent a potential use after free here. Regards, Halil > + qstatus = vfio_ap_setirq(q); > + if (qstatus.response_code) { > + q->nib = 0; > + q->isc = 0; > + } > + } else > + qstatus = vfio_ap_clrirq(q); > + > +out: > + memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus)); > + return 0; > +} [..]