[RFC PATCH 2/2] device-assignment: Count required kvm memory slots

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Each MMIO PCI BAR of an assigned device is directly mapped via a KVM
memory slot to avoid bouncing reads and writes through qemu.  KVM only
provides a (small) fixed number of these slots and attempting to
exceed the unadvertised limit results in an abort.  We can't reserve
slots, but let's at least try to make an attempt to check whether
there are enough available before adding a device.

The non-hotplug case is troublesome here because we have no visibility
as to what else might make use of these slots, but hasn't yet been
mapped.  We used to limit the number of devices that could be specified
on the commandline using the -pcidevice option.  The heuristic here
seems to work and provides a similar limit.

We can also avoid using these memory slots by allowing devices to
bounce mmio access through qemu.  This is trivially accomplished by
adding a force_slow=on option to pci-assign.

Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
---

 hw/device-assignment.c |   59 +++++++++++++++++++++++++++++++++++++++++++++++-
 hw/device-assignment.h |    3 ++
 2 files changed, 61 insertions(+), 1 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index e97f565..0063a11 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -546,7 +546,9 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
                 ? PCI_BASE_ADDRESS_MEM_PREFETCH
                 : PCI_BASE_ADDRESS_SPACE_MEMORY;
 
-            if (cur_region->size & 0xFFF) {
+            if (pci_dev->features & ASSIGNED_DEVICE_FORCE_SLOW_MASK) {
+                slow_map = 1;
+            } else if (cur_region->size & 0xFFF) {
                 fprintf(stderr, "PCI region %d at address 0x%llx "
                         "has size 0x%x, which is not a multiple of 4K. "
                         "You might experience some performance hit "
@@ -556,6 +558,10 @@ static int assigned_dev_register_regions(PCIRegion *io_regions,
                 slow_map = 1;
             }
 
+            if (!slow_map) {
+                pci_dev->slots_needed++;
+            }
+
             /* map physical memory */
             pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
             pci_dev->v_addrs[i].u.r_virtbase = mmap(NULL, cur_region->size,
@@ -1666,6 +1672,30 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
 
 static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
 {
+    int i;
+    PCIRegion *pci_region = dev->real_device.regions;
+
+    /* Determine if the MSI-X table splits a BAR, requiring the use of
+     * two memory slots, one to map each remaining part. */
+    if (!(dev->features & ASSIGNED_DEVICE_FORCE_SLOW_MASK)) {
+        for (i = 0; i < dev->real_device.region_number; i++, pci_region++) {
+            if (!pci_region->valid) {
+                continue;
+            }
+
+            if (ranges_overlap(pci_region->base_addr, pci_region->size,
+                               dev->msix_table_addr, 0x1000)) {
+                target_phys_addr_t offset;
+
+                offset = dev->msix_table_addr - pci_region->base_addr;
+                if (offset && pci_region->size > offset + 0x1000) {
+                    dev->slots_needed++;
+                }
+                break;
+            }
+        }
+    }
+
     dev->msix_table_page = mmap(NULL, 0x1000,
                                 PROT_READ|PROT_WRITE,
                                 MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
@@ -1768,6 +1798,31 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
         if (assigned_dev_register_msix_mmio(dev))
             goto assigned_out;
 
+    if (!(dev->features & ASSIGNED_DEVICE_FORCE_SLOW_MASK)) {
+        int free_slots = kvm_free_slots();
+        int total_slots = dev->slots_needed;
+
+        if (!dev->dev.qdev.hotplugged) {
+            AssignedDevice *adev;
+
+            QLIST_FOREACH(adev, &devs, next) {
+                total_slots += adev->slots_needed;
+            }
+
+            /* This seems to work, but it's completely heuristically
+             * determined.  Any number of things might make use of kvm
+             * memory slots before the guest starts mapping memory BARs.
+             * This is really just a guess. */
+            free_slots -= 13;
+        }
+
+        if (total_slots > free_slots) {
+            error_report("pci-assign: Out of memory slots, need %d, have %d\n",
+                         total_slots, free_slots);
+            goto assigned_out;
+        }
+    }
+
     assigned_dev_load_option_rom(dev);
     QLIST_INSERT_HEAD(&devs, dev, next);
 
@@ -1837,6 +1892,8 @@ static PCIDeviceInfo assign_info = {
                         ASSIGNED_DEVICE_USE_IOMMU_BIT, true),
         DEFINE_PROP_BIT("prefer_msi", AssignedDevice, features,
                         ASSIGNED_DEVICE_PREFER_MSI_BIT, true),
+        DEFINE_PROP_BIT("force_slow", AssignedDevice, features,
+                        ASSIGNED_DEVICE_FORCE_SLOW_BIT, false),
         DEFINE_PROP_STRING("configfd", AssignedDevice, configfd_name),
         DEFINE_PROP_END_OF_LIST(),
     },
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index c94a730..ad3cc80 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -76,9 +76,11 @@ typedef struct {
 
 #define ASSIGNED_DEVICE_USE_IOMMU_BIT	0
 #define ASSIGNED_DEVICE_PREFER_MSI_BIT	1
+#define ASSIGNED_DEVICE_FORCE_SLOW_BIT	2
 
 #define ASSIGNED_DEVICE_USE_IOMMU_MASK	(1 << ASSIGNED_DEVICE_USE_IOMMU_BIT)
 #define ASSIGNED_DEVICE_PREFER_MSI_MASK	(1 << ASSIGNED_DEVICE_PREFER_MSI_BIT)
+#define ASSIGNED_DEVICE_FORCE_SLOW_MASK	(1 << ASSIGNED_DEVICE_FORCE_SLOW_BIT)
 
 typedef struct AssignedDevice {
     PCIDevice dev;
@@ -111,6 +113,7 @@ typedef struct AssignedDevice {
     int mmio_index;
     int need_emulate_cmd;
     char *configfd_name;
+    int slots_needed;
     QLIST_ENTRY(AssignedDevice) next;
 } AssignedDevice;
 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux