Hi Weidong, In general, this looks like a good cleanup. With deassign_device() fixed to only require assigned_dev_id, I would be happy to ACK this whole patch. However, it would be much, much easier to review the patch if you had split it into multiple patches e.g. 1) Make init_assigned_device() call free_assigned_device on error 2) Split out assign_device() with no functional changes 3) Split out assign_irq() with no functional changes 4) Add deassign_device() and make init_assigned_device() and assigned_dev_update_irqs() use it 5) Add device hotunplug On Tue, 2009-02-10 at 20:40 +0800, Han, Weidong wrote: > when hot remove a device with iommu, should deassign it from guest, > and free it from qemu. > > assign_dev_update_irqs may not be invoked when hot add a device, > so need to assign irq after assign device in init_assigned_device. > > Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx> > --- > qemu/hw/device-assignment.c | 187 ++++++++++++++++++++++++++++++------------- > qemu/hw/device-assignment.h | 3 +- > qemu/hw/device-hotplug.c | 18 ++++- > 3 files changed, 151 insertions(+), 57 deletions(-) > > diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c > index e6d2352..6362798 100644 > --- a/qemu/hw/device-assignment.c > +++ b/qemu/hw/device-assignment.c > @@ -443,7 +443,7 @@ again: > > static LIST_HEAD(, AssignedDevInfo) adev_head; > > -void free_assigned_device(AssignedDevInfo *adev) > +static void free_assigned_device(AssignedDevInfo *adev) Right, if init_assigned_device() fails, the device gets freed, so nothing outside of this file needs this. > @@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) > return (uint32_t)bus << 8 | (uint32_t)devfn; > } > > +static int assign_device(AssignedDevInfo *adev) > +{ > + struct kvm_assigned_pci_dev assigned_dev_data; > + AssignedDevice *dev = adev->assigned_dev; > + int r; > + > + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); > + assigned_dev_data.assigned_dev_id = > + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); > + assigned_dev_data.busnr = dev->h_busnr; > + assigned_dev_data.devfn = dev->h_devfn; > + > +#ifdef KVM_CAP_IOMMU > + /* We always enable the IOMMU if present > + * (or when not disabled on the command line) > + */ > + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); > + if (r && !adev->disable_iommu) > + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; > +#endif > + > + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); > + if (r < 0) > + fprintf(stderr, "Failed to assign device \"%s\" : %s\n", > + adev->name, strerror(-r)); > + return r; > +} Just a copy of the code from init_assigned_device() with no functional changes. > +static void deassign_device(AssignedDevInfo *adev) > +{ > + struct kvm_assigned_pci_dev assigned_dev_data; > + AssignedDevice *dev = adev->assigned_dev; > + int r; > + > + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); > + assigned_dev_data.assigned_dev_id = > + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); We should only need to set assigned_dev_id for deassignment, so these lines should not be needed: > + assigned_dev_data.busnr = dev->h_busnr; > + assigned_dev_data.devfn = dev->h_devfn; > + > +#ifdef KVM_CAP_IOMMU > + /* We always enable the IOMMU if present > + * (or when not disabled on the command line) > + */ > + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); > + if (r && !adev->disable_iommu) > + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; > +#endif I think the kernel side needs this fix: - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) + if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) kvm_deassign_device(kvm, match); > + if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data)) > + fprintf(stderr, "Could not notify kernel about " > + "deassigned device (%x:%x.%x)\n", > + dev->h_busnr, PCI_SLOT(dev->h_devfn), PCI_FUNC(dev->h_devfn)); > +} > + > +static int assign_irq(AssignedDevInfo *adev) > +{ > + struct kvm_assigned_irq assigned_irq_data; > + AssignedDevice *dev = adev->assigned_dev; > + int irq, r = 0; > + > + irq = pci_map_irq(&dev->dev, dev->intpin); > + irq = piix_get_irq(irq); > + > +#ifdef TARGET_IA64 > + irq = ipf_map_irq(&dev->dev, irq); > +#endif > + > + if (dev->girq == irq) > + return r; > + > + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); > + assigned_irq_data.assigned_dev_id = > + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); > + assigned_irq_data.guest_irq = irq; > + assigned_irq_data.host_irq = dev->real_device.irq; > + r = kvm_assign_irq(kvm_context, &assigned_irq_data); > + if (r < 0) { > + fprintf(stderr, "Failed to assign irq for \"%s\": %s\n", > + adev->name, strerror(-r)); > + fprintf(stderr, "Perhaps you are assigning a device " > + "that shares an IRQ with another device?\n"); > + return r; > + } > + > + dev->girq = irq; > + return r; > +} Just a copy of the code in assigned_dev_update_irqs(), no functional changes. > +void remove_assigned_device(AssignedDevInfo *adev) > +{ > + deassign_device(adev); > + free_assigned_device(adev); > +} Okay, this is what the irq assignment code uses in its error handling and what the device hotunplug uses. > +AssignedDevInfo *get_assigned_device(int pcibus, int slot) > +{ > + AssignedDevice *assigned_dev = NULL; > + AssignedDevInfo *adev = NULL; > + > + LIST_FOREACH(adev, &adev_head, next) { > + assigned_dev = adev->assigned_dev; > + if (pci_bus_num(assigned_dev->dev.bus) == pcibus && > + PCI_SLOT(assigned_dev->dev.devfn) == slot) > + return adev; > + } > + > + return NULL; > +} Fine. > /* The pci config space got updated. Check if irq numbers have changed > * for our devices > */ > @@ -496,38 +606,12 @@ void assigned_dev_update_irqs() > > adev = LIST_FIRST(&adev_head); > while (adev) { > - AssignedDevInfo *next = LIST_NEXT(adev, next); > + struct AssignedDevInfo *next = LIST_NEXT(adev, next); This is a spurious change, we don't need it. > - AssignedDevice *assigned_dev = adev->assigned_dev; > - int irq, r; > + int r; > > - irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin); > - irq = piix_get_irq(irq); > - > -#ifdef TARGET_IA64 > - irq = ipf_map_irq(&assigned_dev->dev, irq); > -#endif > - > - if (irq != assigned_dev->girq) { > - struct kvm_assigned_irq assigned_irq_data; > - > - memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); > - assigned_irq_data.assigned_dev_id = > - calc_assigned_dev_id(assigned_dev->h_busnr, > - (uint8_t) assigned_dev->h_devfn); > - assigned_irq_data.guest_irq = irq; > - assigned_irq_data.host_irq = assigned_dev->real_device.irq; > - r = kvm_assign_irq(kvm_context, &assigned_irq_data); > - if (r < 0) { > - fprintf(stderr, "Failed to assign irq for \"%s\": %s\n", > - adev->name, strerror(-r)); > - fprintf(stderr, "Perhaps you are assigning a device " > - "that shares an IRQ with another device?\n"); > - free_assigned_device(adev); > - adev = next; > - continue; > - } > - assigned_dev->girq = irq; > - } > + r = assign_irq(adev); > + if (r < 0) > + remove_assigned_device(adev); Okay, the addition of deassignment is the only functional change. > @@ -576,29 +659,23 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus) > dev->h_busnr = adev->bus; > dev->h_devfn = PCI_DEVFN(adev->dev, adev->func); > > - memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); > - assigned_dev_data.assigned_dev_id = > - calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn); > - assigned_dev_data.busnr = dev->h_busnr; > - assigned_dev_data.devfn = dev->h_devfn; > - > -#ifdef KVM_CAP_IOMMU > - /* We always enable the IOMMU if present > - * (or when not disabled on the command line) > - */ > - r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); > - if (r && !adev->disable_iommu) > - assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; > -#endif > + /* assign device */ > + r = assign_device(adev); > + if (r < 0) > + goto assigned_out; > > - r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); > - if (r < 0) { > - fprintf(stderr, "Failed to assign device \"%s\" : %s\n", > - adev->name, strerror(-r)); > - return NULL; > - } > + /* assign irq for the device */ > + r = assign_irq(adev); > + if (r < 0) > + goto assigned_out; > > return &dev->dev; > + > +assigned_out: > + deassign_device(adev); > +out: > + free_assigned_device(adev); > + return NULL; > } The addition of deassignment and freeing are the only functional changes. Cheers, Mark. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html