when hot remove a device with iommu, should deassign it from guest, and free it from qemu. assign_dev_update_irqs may not be invoked when hot add a device, so need to assign irq after assign device in init_assigned_device. Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx> --- qemu/hw/device-assignment.c | 187 ++++++++++++++++++++++++++++++------------- qemu/hw/device-assignment.h | 3 +- qemu/hw/device-hotplug.c | 18 ++++- 3 files changed, 151 insertions(+), 57 deletions(-) diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c index e6d2352..6362798 100644 --- a/qemu/hw/device-assignment.c +++ b/qemu/hw/device-assignment.c @@ -443,7 +443,7 @@ again: static LIST_HEAD(, AssignedDevInfo) adev_head; -void free_assigned_device(AssignedDevInfo *adev) +static void free_assigned_device(AssignedDevInfo *adev) { AssignedDevice *dev = adev->assigned_dev; @@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) return (uint32_t)bus << 8 | (uint32_t)devfn; } +static int assign_device(AssignedDevInfo *adev) +{ + struct kvm_assigned_pci_dev assigned_dev_data; + AssignedDevice *dev = adev->assigned_dev; + int r; + + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); + assigned_dev_data.assigned_dev_id = + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + assigned_dev_data.busnr = dev->h_busnr; + assigned_dev_data.devfn = dev->h_devfn; + +#ifdef KVM_CAP_IOMMU + /* We always enable the IOMMU if present + * (or when not disabled on the command line) + */ + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); + if (r && !adev->disable_iommu) + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; +#endif + + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); + if (r < 0) + fprintf(stderr, "Failed to assign device \"%s\" : %s\n", + adev->name, strerror(-r)); + return r; +} + +static void deassign_device(AssignedDevInfo *adev) +{ + struct kvm_assigned_pci_dev assigned_dev_data; + AssignedDevice *dev = adev->assigned_dev; + int r; + + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); + assigned_dev_data.assigned_dev_id = + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + assigned_dev_data.busnr = dev->h_busnr; + assigned_dev_data.devfn = dev->h_devfn; + +#ifdef KVM_CAP_IOMMU + /* We always enable the IOMMU if present + * (or when not disabled on the command line) + */ + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); + if (r && !adev->disable_iommu) + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; +#endif + + if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data)) + fprintf(stderr, "Could not notify kernel about " + "deassigned device (%x:%x.%x)\n", + dev->h_busnr, PCI_SLOT(dev->h_devfn), PCI_FUNC(dev->h_devfn)); +} + +static int assign_irq(AssignedDevInfo *adev) +{ + struct kvm_assigned_irq assigned_irq_data; + AssignedDevice *dev = adev->assigned_dev; + int irq, r = 0; + + irq = pci_map_irq(&dev->dev, dev->intpin); + irq = piix_get_irq(irq); + +#ifdef TARGET_IA64 + irq = ipf_map_irq(&dev->dev, irq); +#endif + + if (dev->girq == irq) + return r; + + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); + assigned_irq_data.assigned_dev_id = + calc_assigned_dev_id(dev->h_busnr, dev->h_devfn); + assigned_irq_data.guest_irq = irq; + assigned_irq_data.host_irq = dev->real_device.irq; + r = kvm_assign_irq(kvm_context, &assigned_irq_data); + if (r < 0) { + fprintf(stderr, "Failed to assign irq for \"%s\": %s\n", + adev->name, strerror(-r)); + fprintf(stderr, "Perhaps you are assigning a device " + "that shares an IRQ with another device?\n"); + return r; + } + + dev->girq = irq; + return r; +} + +void remove_assigned_device(AssignedDevInfo *adev) +{ + deassign_device(adev); + free_assigned_device(adev); +} + +AssignedDevInfo *get_assigned_device(int pcibus, int slot) +{ + AssignedDevice *assigned_dev = NULL; + AssignedDevInfo *adev = NULL; + + LIST_FOREACH(adev, &adev_head, next) { + assigned_dev = adev->assigned_dev; + if (pci_bus_num(assigned_dev->dev.bus) == pcibus && + PCI_SLOT(assigned_dev->dev.devfn) == slot) + return adev; + } + + return NULL; +} + /* The pci config space got updated. Check if irq numbers have changed * for our devices */ @@ -496,38 +606,12 @@ void assigned_dev_update_irqs() adev = LIST_FIRST(&adev_head); while (adev) { - AssignedDevInfo *next = LIST_NEXT(adev, next); - AssignedDevice *assigned_dev = adev->assigned_dev; - int irq, r; + int r; + struct AssignedDevInfo *next = LIST_NEXT(adev, next); - irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin); - irq = piix_get_irq(irq); - -#ifdef TARGET_IA64 - irq = ipf_map_irq(&assigned_dev->dev, irq); -#endif - - if (irq != assigned_dev->girq) { - struct kvm_assigned_irq assigned_irq_data; - - memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); - assigned_irq_data.assigned_dev_id = - calc_assigned_dev_id(assigned_dev->h_busnr, - (uint8_t) assigned_dev->h_devfn); - assigned_irq_data.guest_irq = irq; - assigned_irq_data.host_irq = assigned_dev->real_device.irq; - r = kvm_assign_irq(kvm_context, &assigned_irq_data); - if (r < 0) { - fprintf(stderr, "Failed to assign irq for \"%s\": %s\n", - adev->name, strerror(-r)); - fprintf(stderr, "Perhaps you are assigning a device " - "that shares an IRQ with another device?\n"); - free_assigned_device(adev); - adev = next; - continue; - } - assigned_dev->girq = irq; - } + r = assign_irq(adev); + if (r < 0) + remove_assigned_device(adev); adev = next; } @@ -538,7 +622,6 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus) int r; AssignedDevice *dev; uint8_t e_device, e_intx; - struct kvm_assigned_pci_dev assigned_dev_data; DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n", adev->name, adev->bus, adev->dev, adev->func); @@ -558,14 +641,14 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus) if (get_real_device(dev, adev->bus, adev->dev, adev->func)) { fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n", __func__, adev->name); - return NULL; + goto out; } /* handle real device's MMIO/PIO BARs */ if (assigned_dev_register_regions(dev->real_device.regions, dev->real_device.region_number, dev)) - return NULL; + goto out; /* handle interrupt routing */ e_device = (dev->dev.devfn >> 3) & 0x1f; @@ -576,29 +659,23 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus) dev->h_busnr = adev->bus; dev->h_devfn = PCI_DEVFN(adev->dev, adev->func); - memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); - assigned_dev_data.assigned_dev_id = - calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn); - assigned_dev_data.busnr = dev->h_busnr; - assigned_dev_data.devfn = dev->h_devfn; - -#ifdef KVM_CAP_IOMMU - /* We always enable the IOMMU if present - * (or when not disabled on the command line) - */ - r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); - if (r && !adev->disable_iommu) - assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; -#endif + /* assign device */ + r = assign_device(adev); + if (r < 0) + goto assigned_out; - r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); - if (r < 0) { - fprintf(stderr, "Failed to assign device \"%s\" : %s\n", - adev->name, strerror(-r)); - return NULL; - } + /* assign irq for the device */ + r = assign_irq(adev); + if (r < 0) + goto assigned_out; return &dev->dev; + +assigned_out: + deassign_device(adev); +out: + free_assigned_device(adev); + return NULL; } /* diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h index f216bb0..da775d7 100644 --- a/qemu/hw/device-assignment.h +++ b/qemu/hw/device-assignment.h @@ -94,10 +94,11 @@ struct AssignedDevInfo { int disable_iommu; }; -void free_assigned_device(AssignedDevInfo *adev); PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus); AssignedDevInfo *add_assigned_device(const char *arg); void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices); +void remove_assigned_device(AssignedDevInfo *adev); +AssignedDevInfo *get_assigned_device(int pcibus, int slot); ram_addr_t assigned_dev_load_option_roms(ram_addr_t rom_base_offset); void assigned_dev_update_irqs(void); diff --git a/qemu/hw/device-hotplug.c b/qemu/hw/device-hotplug.c index d8f0fcc..9b213ad 100644 --- a/qemu/hw/device-hotplug.c +++ b/qemu/hw/device-hotplug.c @@ -51,7 +51,6 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr) ret = init_assigned_device(adev, pci_bus); if (ret == NULL) { term_printf("Failed to assign device\n"); - free_assigned_device(adev); return NULL; } @@ -64,6 +63,14 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr) return ret; } +static void qemu_system_hot_deassign_device(AssignedDevInfo *adev) +{ + remove_assigned_device(adev); + + term_printf("Unregistered host PCI device %02x:%02x.%1x " + "(\"%s\") from guest\n", + adev->bus, adev->dev, adev->func, adev->name); +} #endif /* USE_KVM_DEVICE_ASSIGNMENT */ static int add_init_drive(const char *opts) @@ -241,12 +248,21 @@ void device_hot_remove_success(int pcibus, int slot) { PCIDevice *d = pci_find_device(pcibus, slot); int class_code; + AssignedDevInfo *adev; if (!d) { term_printf("invalid slot %d\n", slot); return; } +#ifdef USE_KVM_DEVICE_ASSIGNMENT + adev = get_assigned_device(pcibus, slot); + if (adev) { + qemu_system_hot_deassign_device(adev); + return; + } +#endif /* USE_KVM_DEVICE_ASSIGNMENT */ + class_code = d->config_read(d, PCI_CLASS_DEVICE+1, 1); pci_unregister_device(d); -- 1.6.0.4
Attachment:
0004-kvm-qemu-fix-hot-remove-device.patch
Description: 0004-kvm-qemu-fix-hot-remove-device.patch