[RFC PATCH V2 09/10] Qemu/VFIO: Add SRIOV VF migration support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch is to add SRIOV VF migration support.
Create new device type "vfio-sriov" and add faked PCI migration capability
to the type device.

The purpose of the new capability
1) sync migration status with VF driver in the VM
2) Get mailbox irq vector to notify VF driver during migration.
3) Provide a way to control injecting irq or not.

Qemu will migrate PCI configure space regs and MSIX config for VF.
Inject mailbox irq at last stage of migration to notify VF about
migration event and wait VF driver ready for migration. VF driver
writeS PCI config reg PCI_VF_MIGRATION_VF_STATUS in the new cap table
to tell Qemu.

Signed-off-by: Lan Tianyu <tianyu.lan@xxxxxxxxx>
---
 hw/vfio/Makefile.objs |   2 +-
 hw/vfio/pci.c         |   6 ++
 hw/vfio/pci.h         |   4 ++
 hw/vfio/sriov.c       | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 189 insertions(+), 1 deletion(-)
 create mode 100644 hw/vfio/sriov.c

diff --git a/hw/vfio/Makefile.objs b/hw/vfio/Makefile.objs
index d540c9d..9cf0178 100644
--- a/hw/vfio/Makefile.objs
+++ b/hw/vfio/Makefile.objs
@@ -1,6 +1,6 @@
 ifeq ($(CONFIG_LINUX), y)
 obj-$(CONFIG_SOFTMMU) += common.o
-obj-$(CONFIG_PCI) += pci.o
+obj-$(CONFIG_PCI) += pci.o sriov.o
 obj-$(CONFIG_SOFTMMU) += platform.o
 obj-$(CONFIG_SOFTMMU) += calxeda-xgmac.o
 endif
diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c
index 7c43fc1..e7583b5 100644
--- a/hw/vfio/pci.c
+++ b/hw/vfio/pci.c
@@ -2013,6 +2013,11 @@ void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
         } else if (was_enabled && !is_enabled) {
             vfio_disable_msix(vdev);
         }
+    } else if (vdev->migration_cap &&
+        ranges_overlap(addr, len, vdev->migration_cap, 0x10)) {
+        /* Write everything to QEMU to keep emulated bits correct */
+        pci_default_write_config(pdev, addr, val, len);
+        vfio_migration_cap_handle(pdev, addr, val, len);
     } else {
         /* Write everything to QEMU to keep emulated bits correct */
         pci_default_write_config(pdev, addr, val, len);
@@ -3517,6 +3522,7 @@ static int vfio_initfn(PCIDevice *pdev)
     vfio_register_err_notifier(vdev);
     vfio_register_req_notifier(vdev);
     vfio_setup_resetfn(vdev);
+    vfio_add_migration_capability(vdev);
 
     return 0;
 
diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h
index 6c00575..ee6ca5e 100644
--- a/hw/vfio/pci.h
+++ b/hw/vfio/pci.h
@@ -134,6 +134,7 @@ typedef struct VFIOPCIDevice {
     PCIHostDeviceAddress host;
     EventNotifier err_notifier;
     EventNotifier req_notifier;
+    uint16_t    migration_cap;
     int (*resetfn)(struct VFIOPCIDevice *);
     uint32_t features;
 #define VFIO_FEATURE_ENABLE_VGA_BIT 0
@@ -162,3 +163,6 @@ uint32_t vfio_pci_read_config(PCIDevice *pdev, uint32_t addr, int len);
 void vfio_pci_write_config(PCIDevice *pdev, uint32_t addr,
                            uint32_t val, int len);
 void vfio_enable_msix(VFIOPCIDevice *vdev);
+void vfio_add_migration_capability(VFIOPCIDevice *vdev);
+void vfio_migration_cap_handle(PCIDevice *pdev, uint32_t addr,
+                               uint32_t val, int len);
diff --git a/hw/vfio/sriov.c b/hw/vfio/sriov.c
new file mode 100644
index 0000000..3109538
--- /dev/null
+++ b/hw/vfio/sriov.c
@@ -0,0 +1,178 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/io.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <glob.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+
+#include "hw/hw.h"
+#include "hw/vfio/pci.h"
+#include "hw/vfio/vfio.h"
+#include "hw/vfio/vfio-common.h"
+
+#define TYPE_VFIO_SRIOV "vfio-sriov"
+
+#define SRIOV_LM_SETUP 0x01
+#define SRIOV_LM_COMPLETE 0x02
+
+QemuEvent migration_event;
+
+static void vfio_dev_post_load(void *opaque)
+{
+    struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    MSIMessage msg;
+    int vector;
+
+    if (vfio_pci_read_config(pdev,
+            vdev->migration_cap + PCI_VF_MIGRATION_CAP, 1)
+            != PCI_VF_MIGRATION_ENABLE)
+        return;
+
+    vector = vfio_pci_read_config(pdev,
+        vdev->migration_cap + PCI_VF_MIGRATION_IRQ, 1);
+
+    msg = msix_get_message(pdev, vector);
+    kvm_irqchip_send_msi(kvm_state, msg);
+}
+
+static int vfio_dev_load(QEMUFile *f, void *opaque, int version_id)
+{
+    struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    int ret;
+
+    if(qemu_get_byte(f)!= SRIOV_LM_COMPLETE)
+        return 0;
+
+    ret = pci_device_load(pdev, f);
+    if (ret) {
+        error_report("Faild to load PCI config space.\n");
+        return ret;
+    }
+
+    if (msix_enabled(pdev)) {
+        vfio_enable_msix(vdev);
+        msix_load(pdev, f);
+    }
+
+    vfio_pci_write_config(pdev,vdev->migration_cap +
+        PCI_VF_MIGRATION_VMM_STATUS, VMM_MIGRATION_END, 1);
+    vfio_pci_write_config(pdev,vdev->migration_cap +
+        PCI_VF_MIGRATION_VF_STATUS, PCI_VF_WAIT_FOR_MIGRATION, 1);
+    return 0;
+}
+
+static int vfio_dev_save_complete(QEMUFile *f, void *opaque)
+{
+    struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+
+    qemu_put_byte(f, SRIOV_LM_COMPLETE);
+    pci_device_save(pdev, f);
+
+    if (msix_enabled(pdev)) {
+        msix_save(pdev, f);
+    }
+
+    return 0;
+}
+
+static int vfio_dev_setup(QEMUFile *f, void *opaque)
+{
+    qemu_put_byte(f, SRIOV_LM_SETUP);
+    return 0;
+}
+
+static void vfio_dev_save_before_stop(QEMUFile *f, void *opaque)
+{
+    struct PCIDevice *pdev = (struct PCIDevice *)opaque;
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+    int vector;
+    MSIMessage msg;
+
+    vfio_pci_write_config(pdev, vdev->migration_cap +
+        PCI_VF_MIGRATION_VMM_STATUS, VMM_MIGRATION_START, 1);
+
+    if (vfio_pci_read_config(pdev,
+            vdev->migration_cap + PCI_VF_MIGRATION_CAP, 1)
+            != PCI_VF_MIGRATION_ENABLE)
+        return;
+
+    vector = vfio_pci_read_config(pdev,
+        vdev->migration_cap + PCI_VF_MIGRATION_IRQ, 1);
+
+    qemu_event_reset(&migration_event);
+
+    msg = msix_get_message(pdev, vector);
+    kvm_irqchip_send_msi(kvm_state, msg);
+
+    qemu_event_wait(&migration_event);
+}
+
+static SaveVMHandlers savevm_pt_handlers = {
+    .save_live_setup = vfio_dev_setup,
+    .save_live_complete = vfio_dev_save_complete,
+    .save_before_stop = vfio_dev_save_before_stop,          
+    .load_state = vfio_dev_load,
+    .post_load_state = vfio_dev_post_load,
+};
+
+void vfio_add_migration_capability(VFIOPCIDevice *vdev)
+{
+    PCIDevice *pdev = &vdev->pdev;
+    int free_pos;
+
+    if (strcmp(object_get_typename(OBJECT(vdev)), TYPE_VFIO_SRIOV))
+        return;
+
+    free_pos = vfio_find_free_cfg_reg(vdev,
+                pdev->config[PCI_CAPABILITY_LIST],
+                PCI_VF_MIGRATION_CAP_SIZE);
+    if (free_pos) {
+        vdev->migration_cap = free_pos;
+    	pci_add_capability(pdev, PCI_CAP_ID_MIGRATION,
+                        free_pos, PCI_VF_MIGRATION_CAP_SIZE);
+    	memset(vdev->emulated_config_bits + free_pos, 0xff,
+                        PCI_VF_MIGRATION_CAP_SIZE);
+    	memset(vdev->pdev.wmask + free_pos, 0xff,
+                        PCI_VF_MIGRATION_CAP_SIZE);
+     } else
+        error_report("vfio: Fail to find free PCI config space regs.\n");
+}
+
+void vfio_migration_cap_handle(PCIDevice *pdev, uint32_t addr,
+                                  uint32_t val, int len)
+{
+    VFIOPCIDevice *vdev = DO_UPCAST(VFIOPCIDevice, pdev, pdev);
+
+    if (addr == vdev->migration_cap + PCI_VF_MIGRATION_VF_STATUS
+        && val == PCI_VF_READY_FOR_MIGRATION) {       
+        qemu_event_set(&migration_event);
+    }
+}
+
+static void vfio_sriov_instance_init(Object *obj)
+{
+    PCIDevice *pdev = PCI_DEVICE(obj);
+
+    register_savevm_live(NULL, "vfio-sriov", 1, 1,
+                         &savevm_pt_handlers, pdev);
+
+    qemu_event_init(&migration_event, false);
+
+}
+
+static const TypeInfo vfio_sriov_type_info = {
+    .name = TYPE_VFIO_SRIOV,
+    .parent = "vfio-pci", 
+    .instance_init = vfio_sriov_instance_init,
+};
+
+static void sriov_register_types(void)
+{
+    type_register_static(&vfio_sriov_type_info);
+}
+type_init(sriov_register_types)
-- 
1.9.3

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux