[PATCH 4/4] kvm: qemu: fix hot remove device

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



when hot remove a device with iommu, should deassign it from guest,
and free it from qemu.

assign_dev_update_irqs may not be invoked when hot add a device,
so need to assign irq after assign device in init_assigned_device.

Signed-off-by: Weidong Han <weidong.han@xxxxxxxxx>
---
 qemu/hw/device-assignment.c |  187 ++++++++++++++++++++++++++++++-------------
 qemu/hw/device-assignment.h |    3 +-
 qemu/hw/device-hotplug.c    |   18 ++++-
 3 files changed, 151 insertions(+), 57 deletions(-)

diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
index e6d2352..6362798 100644
--- a/qemu/hw/device-assignment.c
+++ b/qemu/hw/device-assignment.c
@@ -443,7 +443,7 @@ again:
 
 static LIST_HEAD(, AssignedDevInfo) adev_head;
 
-void free_assigned_device(AssignedDevInfo *adev)
+static void free_assigned_device(AssignedDevInfo *adev)
 {
     AssignedDevice *dev = adev->assigned_dev;
 
@@ -487,6 +487,116 @@ static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
     return (uint32_t)bus << 8 | (uint32_t)devfn;
 }
 
+static int assign_device(AssignedDevInfo *adev)
+{
+    struct kvm_assigned_pci_dev assigned_dev_data;
+    AssignedDevice *dev = adev->assigned_dev;
+    int r;
+
+    memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+    assigned_dev_data.assigned_dev_id  =
+	calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+    assigned_dev_data.busnr = dev->h_busnr;
+    assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+    /* We always enable the IOMMU if present
+     * (or when not disabled on the command line)
+     */
+    r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+    if (r && !adev->disable_iommu)
+	assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+ 
+    r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
+    if (r < 0)
+	fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
+                adev->name, strerror(-r));
+    return r;
+}
+
+static void deassign_device(AssignedDevInfo *adev)
+{
+    struct kvm_assigned_pci_dev assigned_dev_data;
+    AssignedDevice *dev = adev->assigned_dev;
+    int r;
+
+    memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
+    assigned_dev_data.assigned_dev_id  =
+	calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+    assigned_dev_data.busnr = dev->h_busnr;
+    assigned_dev_data.devfn = dev->h_devfn;
+
+#ifdef KVM_CAP_IOMMU
+    /* We always enable the IOMMU if present
+     * (or when not disabled on the command line)
+     */
+    r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
+    if (r && !adev->disable_iommu)
+	assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
+#endif
+
+    if (kvm_deassign_pci_device(kvm_context, &assigned_dev_data))
+	fprintf(stderr, "Could not notify kernel about "
+                "deassigned device (%x:%x.%x)\n",
+                dev->h_busnr, PCI_SLOT(dev->h_devfn), PCI_FUNC(dev->h_devfn));
+}
+
+static int assign_irq(AssignedDevInfo *adev)
+{
+    struct kvm_assigned_irq assigned_irq_data;
+    AssignedDevice *dev = adev->assigned_dev;
+    int irq, r = 0;
+
+    irq = pci_map_irq(&dev->dev, dev->intpin);
+    irq = piix_get_irq(irq);
+
+#ifdef TARGET_IA64
+    irq = ipf_map_irq(&dev->dev, irq);
+#endif
+
+    if (dev->girq == irq)
+        return r;
+
+    memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
+    assigned_irq_data.assigned_dev_id =
+        calc_assigned_dev_id(dev->h_busnr, dev->h_devfn);
+    assigned_irq_data.guest_irq = irq;
+    assigned_irq_data.host_irq = dev->real_device.irq;
+    r = kvm_assign_irq(kvm_context, &assigned_irq_data);
+    if (r < 0) {
+        fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
+                adev->name, strerror(-r));
+        fprintf(stderr, "Perhaps you are assigning a device "
+                "that shares an IRQ with another device?\n");
+        return r;
+    }
+
+    dev->girq = irq;
+    return r;
+}
+
+void remove_assigned_device(AssignedDevInfo *adev)
+{
+    deassign_device(adev);
+    free_assigned_device(adev);
+}
+
+AssignedDevInfo *get_assigned_device(int pcibus, int slot)
+{
+    AssignedDevice *assigned_dev = NULL;
+    AssignedDevInfo *adev = NULL;
+
+    LIST_FOREACH(adev, &adev_head, next) {
+        assigned_dev = adev->assigned_dev;
+        if (pci_bus_num(assigned_dev->dev.bus) == pcibus &&
+            PCI_SLOT(assigned_dev->dev.devfn) == slot)
+            return adev;
+    }
+
+    return NULL;
+}
+
 /* The pci config space got updated. Check if irq numbers have changed
  * for our devices
  */
@@ -496,38 +606,12 @@ void assigned_dev_update_irqs()
 
     adev = LIST_FIRST(&adev_head);
     while (adev) {
-        AssignedDevInfo *next = LIST_NEXT(adev, next);
-        AssignedDevice *assigned_dev = adev->assigned_dev;
-        int irq, r;
+        int r;
+        struct AssignedDevInfo *next = LIST_NEXT(adev, next);
 
-        irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin);
-        irq = piix_get_irq(irq);
-
-#ifdef TARGET_IA64
-	irq = ipf_map_irq(&assigned_dev->dev, irq);
-#endif
-
-        if (irq != assigned_dev->girq) {
-            struct kvm_assigned_irq assigned_irq_data;
-
-            memset(&assigned_irq_data, 0, sizeof(assigned_irq_data));
-            assigned_irq_data.assigned_dev_id  =
-                calc_assigned_dev_id(assigned_dev->h_busnr,
-                                     (uint8_t) assigned_dev->h_devfn);
-            assigned_irq_data.guest_irq = irq;
-            assigned_irq_data.host_irq = assigned_dev->real_device.irq;
-            r = kvm_assign_irq(kvm_context, &assigned_irq_data);
-            if (r < 0) {
-                fprintf(stderr, "Failed to assign irq for \"%s\": %s\n",
-                        adev->name, strerror(-r));
-                fprintf(stderr, "Perhaps you are assigning a device "
-                        "that shares an IRQ with another device?\n");
-                free_assigned_device(adev);
-                adev = next;
-                continue;
-            }
-            assigned_dev->girq = irq;
-        }
+        r = assign_irq(adev);
+        if (r < 0)
+            remove_assigned_device(adev);
 
         adev = next;
     }
@@ -538,7 +622,6 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
     int r;
     AssignedDevice *dev;
     uint8_t e_device, e_intx;
-    struct kvm_assigned_pci_dev assigned_dev_data;
 
     DEBUG("Registering real physical device %s (bus=%x dev=%x func=%x)\n",
           adev->name, adev->bus, adev->dev, adev->func);
@@ -558,14 +641,14 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
     if (get_real_device(dev, adev->bus, adev->dev, adev->func)) {
         fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n",
                 __func__, adev->name);
-        return NULL;
+        goto out;
     }
 
     /* handle real device's MMIO/PIO BARs */
     if (assigned_dev_register_regions(dev->real_device.regions,
                                       dev->real_device.region_number,
                                       dev))
-        return NULL;
+        goto out;
 
     /* handle interrupt routing */
     e_device = (dev->dev.devfn >> 3) & 0x1f;
@@ -576,29 +659,23 @@ struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus)
     dev->h_busnr = adev->bus;
     dev->h_devfn = PCI_DEVFN(adev->dev, adev->func);
 
-    memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
-    assigned_dev_data.assigned_dev_id  =
-	calc_assigned_dev_id(dev->h_busnr, (uint32_t)dev->h_devfn);
-    assigned_dev_data.busnr = dev->h_busnr;
-    assigned_dev_data.devfn = dev->h_devfn;
-
-#ifdef KVM_CAP_IOMMU
-    /* We always enable the IOMMU if present
-     * (or when not disabled on the command line)
-     */
-    r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
-    if (r && !adev->disable_iommu)
-	assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU;
-#endif
+    /* assign device */
+    r = assign_device(adev);
+    if (r < 0)
+        goto assigned_out;
 
-    r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
-    if (r < 0) {
-	fprintf(stderr, "Failed to assign device \"%s\" : %s\n",
-                adev->name, strerror(-r));
-	return NULL;
-    }
+    /* assign irq for the device */
+    r = assign_irq(adev);
+    if (r < 0)
+        goto assigned_out;
 
     return &dev->dev;
+
+assigned_out:
+    deassign_device(adev);
+out:
+    free_assigned_device(adev);
+    return NULL;
 }
 
 /*
diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
index f216bb0..da775d7 100644
--- a/qemu/hw/device-assignment.h
+++ b/qemu/hw/device-assignment.h
@@ -94,10 +94,11 @@ struct AssignedDevInfo {
     int disable_iommu;
 };
 
-void free_assigned_device(AssignedDevInfo *adev);
 PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus);
 AssignedDevInfo *add_assigned_device(const char *arg);
 void add_assigned_devices(PCIBus *bus, const char **devices, int n_devices);
+void remove_assigned_device(AssignedDevInfo *adev);
+AssignedDevInfo *get_assigned_device(int pcibus, int slot);
 ram_addr_t assigned_dev_load_option_roms(ram_addr_t rom_base_offset);
 void assigned_dev_update_irqs(void);
 
diff --git a/qemu/hw/device-hotplug.c b/qemu/hw/device-hotplug.c
index d8f0fcc..9b213ad 100644
--- a/qemu/hw/device-hotplug.c
+++ b/qemu/hw/device-hotplug.c
@@ -51,7 +51,6 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr)
     ret = init_assigned_device(adev, pci_bus);
     if (ret == NULL) {
         term_printf("Failed to assign device\n");
-        free_assigned_device(adev);
         return NULL;
     }
 
@@ -64,6 +63,14 @@ static PCIDevice *qemu_system_hot_assign_device(const char *opts, int bus_nr)
     return ret;
 }
 
+static void qemu_system_hot_deassign_device(AssignedDevInfo *adev)
+{
+    remove_assigned_device(adev);
+
+    term_printf("Unregistered host PCI device %02x:%02x.%1x "
+		"(\"%s\") from guest\n", 
+		adev->bus, adev->dev, adev->func, adev->name);
+}
 #endif /* USE_KVM_DEVICE_ASSIGNMENT */
 
 static int add_init_drive(const char *opts)
@@ -241,12 +248,21 @@ void device_hot_remove_success(int pcibus, int slot)
 {
     PCIDevice *d = pci_find_device(pcibus, slot);
     int class_code;
+    AssignedDevInfo *adev;
 
     if (!d) {
         term_printf("invalid slot %d\n", slot);
         return;
     }
 
+#ifdef USE_KVM_DEVICE_ASSIGNMENT
+    adev = get_assigned_device(pcibus, slot);
+    if (adev) {
+        qemu_system_hot_deassign_device(adev);
+        return;
+    }
+#endif /* USE_KVM_DEVICE_ASSIGNMENT */
+
     class_code = d->config_read(d, PCI_CLASS_DEVICE+1, 1);
 
     pci_unregister_device(d);
-- 
1.6.0.4

Attachment: 0004-kvm-qemu-fix-hot-remove-device.patch
Description: 0004-kvm-qemu-fix-hot-remove-device.patch


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux