[PATCH 12/13] pci-assign: Generic config space access management

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>

This drastically simplifies config space access management: Instead of
coding various range checks and merging bits, set up two access control
bitmaps. One defines, which bits can be directly read from the device,
the other allows direct write to the device, also with bit-granularity.

The setup of those bitmaps, specifically the corner cases like the
capability list status bit, is way more readable than the existing code.
And the generic config access functions just need one additional logic
to catch writes to the MSI/MSI-X control flags and call the related
update handlers.

Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
---
 hw/device-assignment.c |  325 ++++++++++++++++-------------------------------
 hw/device-assignment.h |    3 +-
 2 files changed, 113 insertions(+), 215 deletions(-)

diff --git a/hw/device-assignment.c b/hw/device-assignment.c
index 6a2ca8c..be199d2 100644
--- a/hw/device-assignment.c
+++ b/hw/device-assignment.c
@@ -63,35 +63,6 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
 
 static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
 
-static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
-                                                 uint32_t address,
-                                                 uint32_t val, int len);
-
-static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
-                                                    uint32_t address, int len);
-
-/* Merge the bits set in mask from mval into val.  Both val and mval are
- * at the same addr offset, pos is the starting offset of the mask. */
-static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr,
-                           int len, uint8_t pos, uint32_t mask)
-{
-    if (!ranges_overlap(addr, len, pos, 4)) {
-        return val;
-    }
-
-    if (addr >= pos) {
-        mask >>= (addr - pos) * 8;
-    } else {
-        mask <<= (pos - addr) * 8;
-    }
-    mask &= 0xffffffffU >> (4 - len) * 8;
-
-    val &= ~mask;
-    val |= (mval & mask);
-
-    return val;
-}
-
 static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
                                        uint32_t addr, int len, uint32_t *val)
 {
@@ -404,143 +375,6 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start)
     return 0;
 }
 
-static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
-                                          uint32_t val, int len)
-{
-    int fd;
-    ssize_t ret;
-    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
-
-    DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
-          ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
-          (uint16_t) address, val, len);
-
-    if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
-        return assigned_device_pci_cap_write_config(d, address, val, len);
-    }
-
-    if (ranges_overlap(address, len, PCI_COMMAND, 2)) {
-        pci_default_write_config(d, address, val, len);
-        /* Continue to program the card */
-    }
-
-    /*
-     * Catch access to
-     *  - base address registers
-     *  - ROM base address & capability pointer
-     *  - interrupt line & pin
-     */
-    if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
-        ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
-        pci_default_write_config(d, address, val, len);
-        return;
-    } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) ||
-               ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) {
-        uint32_t real_val;
-
-        pci_default_write_config(d, address, val, len);
-
-        /* Ensure that writes to overlapping areas we don't virtualize still
-         * hit the device. */
-        real_val = assigned_dev_pci_read(d, address, len);
-        val = merge_bits(val, real_val, address, len,
-                         PCI_CAPABILITY_LIST, 0xff);
-        val = merge_bits(val, real_val, address, len,
-                         PCI_INTERRUPT_LINE, 0xffff);
-    }
-
-    DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
-          ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
-          (uint16_t) address, val, len);
-
-    fd = pci_dev->real_device.config_fd;
-
-again:
-    ret = pwrite(fd, &val, len, address);
-    if (ret != len) {
-	if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
-	    goto again;
-
-	fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
-		__func__, ret, errno);
-
-	exit(1);
-    }
-}
-
-static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
-                                             int len)
-{
-    uint32_t val = 0, virt_val;
-    int fd;
-    ssize_t ret;
-    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
-
-    if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
-        val = assigned_device_pci_cap_read_config(d, address, len);
-        DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
-              (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
-        return val;
-    }
-
-    /*
-     * Catch access to
-     *  - vendor & device ID
-     *  - base address registers
-     *  - ROM base address
-     */
-    if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) ||
-        ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
-        ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
-        val = pci_default_read_config(d, address, len);
-        DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
-              (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
-        return val;
-    }
-
-    fd = pci_dev->real_device.config_fd;
-
-again:
-    ret = pread(fd, &val, len, address);
-    if (ret != len) {
-	if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
-	    goto again;
-
-	fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
-		__func__, ret, errno);
-
-	exit(1);
-    }
-
-    DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
-          (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
-
-    if (pci_dev->emulate_cmd_mask) {
-        val = merge_bits(val, pci_default_read_config(d, address, len),
-                         address, len, PCI_COMMAND, pci_dev->emulate_cmd_mask);
-    }
-
-    /*
-     * Merge bits from virtualized
-     *  - capability pointer
-     *  - interrupt line & pin
-     */
-    virt_val = pci_default_read_config(d, address, len);
-    val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff);
-    val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE, 0xffff);
-
-    if (!pci_dev->cap.available) {
-        /* kill the special capabilities */
-        if (address == PCI_COMMAND && len == 4) {
-            val &= ~(PCI_STATUS_CAP_LIST << 16);
-        } else if (address == PCI_STATUS) {
-            val &= ~PCI_STATUS_CAP_LIST;
-        }
-    }
-
-    return val;
-}
-
 static int assigned_dev_register_regions(PCIRegion *io_regions,
                                          unsigned long regions_num,
                                          AssignedDevice *pci_dev)
@@ -790,7 +624,8 @@ again:
     /* dealing with virtual function device */
     snprintf(name, sizeof(name), "%sphysfn/", dir);
     if (!stat(name, &statbuf)) {
-        pci_dev->emulate_cmd_mask = 0xffff;
+        /* always provide the written value on readout */
+        memset(pci_dev->direct_config_read + PCI_COMMAND, 0xff, 2);
     }
 
     dev->region_number = r;
@@ -1268,73 +1103,81 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
     }
 }
 
-/* There can be multiple VNDR capabilities per device, we need to find the
- * one that starts closet to the given address without going over. */
-static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address)
+static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev,
+                                             uint32_t address, int len)
 {
-    uint8_t cap, pos;
+    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
+    uint32_t virt_val = pci_default_read_config(pci_dev, address, len);
+    uint32_t real_val, direct_mask;
 
-    for (cap = pos = 0;
-         (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
-         pos += PCI_CAP_LIST_NEXT) {
-        if (pos <= address) {
-            cap = MAX(pos, cap);
-        }
+    switch (len) {
+    case 4:
+        direct_mask =
+            pci_get_long(assigned_dev->direct_config_read + address);
+        break;
+    case 2:
+        direct_mask =
+            pci_get_word(assigned_dev->direct_config_read + address);
+        break;
+    case 1:
+        direct_mask =
+            pci_get_byte(assigned_dev->direct_config_read + address);
+        break;
+    default:
+        abort();
     }
-    return cap;
-}
-
-static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
-                                                    uint32_t address, int len)
-{
-    uint8_t cap, cap_id = pci_dev->config_map[address];
-    uint32_t val;
-
-    switch (cap_id) {
-
-    case PCI_CAP_ID_VPD:
-        cap = pci_find_capability(pci_dev, cap_id);
-        val = assigned_dev_pci_read(pci_dev, address, len);
-        return merge_bits(val, pci_get_long(pci_dev->config + address),
-                          address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
-
-    case PCI_CAP_ID_VNDR:
-        cap = find_vndr_start(pci_dev, address);
-        val = assigned_dev_pci_read(pci_dev, address, len);
-        return merge_bits(val, pci_get_long(pci_dev->config + address),
-                          address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
+    if (direct_mask) {
+        real_val = assigned_dev_pci_read(pci_dev, address, len);
+        return (virt_val & ~direct_mask) | (real_val & direct_mask);
+    } else {
+        return virt_val;
     }
-
-    return pci_default_read_config(pci_dev, address, len);
 }
 
-static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
-                                                 uint32_t address,
-                                                 uint32_t val, int len)
+static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address,
+                                          uint32_t val, int len)
 {
-    uint8_t cap_id = pci_dev->config_map[address];
+    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
+    uint32_t direct_mask, full_access;
 
     pci_default_write_config(pci_dev, address, val, len);
-    switch (cap_id) {
-    case PCI_CAP_ID_MSI:
+
+    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
         if (range_covers_byte(address, len,
                               pci_dev->msi_cap + PCI_MSI_FLAGS)) {
             assigned_dev_update_msi(pci_dev);
         }
-        break;
-
-    case PCI_CAP_ID_MSIX:
+    } else if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
         if (range_covers_byte(address, len,
                               pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) {
             assigned_dev_update_msix(pci_dev);
         }
-        break;
+    }
 
-    case PCI_CAP_ID_VPD:
-    case PCI_CAP_ID_VNDR:
-        assigned_dev_pci_write(pci_dev, address, val, len);
+    switch (len) {
+    case 4:
+        direct_mask =
+            pci_get_long(assigned_dev->direct_config_write + address);
+        full_access = 0xffffffff;
+        break;
+    case 2:
+        direct_mask =
+            pci_get_word(assigned_dev->direct_config_write + address);
+        full_access = 0xffff;
         break;
+    case 1:
+        direct_mask =
+            pci_get_byte(assigned_dev->direct_config_write + address);
+        full_access = 0xff;
+        break;
+    default:
+        abort();
+    }
+    if (direct_mask != full_access) {
+        val &= direct_mask;
+        val |= assigned_dev_pci_read(pci_dev, address, len) & ~direct_mask;
     }
+    assigned_dev_pci_write(pci_dev, address, val, len);
 }
 
 static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
@@ -1404,6 +1247,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
             return ret;
         }
 
+        /* direct read except for next cap pointer */
+        memset(dev->direct_config_read + pos, 0xff, PCI_PM_SIZEOF);
+        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
+
         pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
         pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
         pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
@@ -1438,6 +1285,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
             return ret;
         }
 
+        /* direct read except for next cap pointer */
+        memset(dev->direct_config_read + pos, 0xff, size);
+        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
+
         type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS);
         type = (type & PCI_EXP_FLAGS_TYPE) >> 8;
         if (type != PCI_EXP_TYPE_ENDPOINT &&
@@ -1513,6 +1364,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
             return ret;
         }
 
+        /* direct read except for next cap pointer */
+        memset(dev->direct_config_read + pos, 0xff, 8);
+        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
+
         /* Command register, clear upper bits, including extended modes */
         cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
         cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
@@ -1534,6 +1389,13 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
         if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) {
             return ret;
         }
+
+        /* direct read except for next cap pointer */
+        memset(dev->direct_config_read + pos, 0xff, 8);
+        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
+
+        /* direct write for cap content */
+        memset(dev->direct_config_write + pos + 2, 0xff, 6);
     }
 
     /* Devices can have multiple vendor capabilities, get them all */
@@ -1545,6 +1407,21 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
                                       pos, len)) < 0) {
             return ret;
         }
+
+        /* direct read except for next cap pointer */
+        memset(dev->direct_config_read + pos, 0xff, len);
+        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
+
+        /* direct write for cap content */
+        memset(dev->direct_config_write + pos + 2, 0xff, len - 2);
+    }
+
+    /* If real and virtual capability list status bits differ, virtualize the
+     * access. */
+    if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) !=
+        (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) &
+         PCI_STATUS_CAP_LIST)) {
+        dev->direct_config_read[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
     }
 
     return 0;
@@ -1694,6 +1571,26 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
         return -1;
     }
 
+    /*
+     * Set up basic config space access control. Will be further refined during
+     * device initialization.
+     *
+     * Direct read/write access is grangted for:
+     *  - status & command registers, revision ID & class code, cacheline size,
+     *    latency timer, header type, BIST
+     *  - cardbus CIS pointer, subsystem vendor & ID
+     *  - reserved fields
+     *  - Min_Gnt & Max_Lat
+     */
+    memset(dev->direct_config_read + PCI_COMMAND, 0xff, 12);
+    memset(dev->direct_config_read + PCI_CARDBUS_CIS, 0xff, 8);
+    memset(dev->direct_config_read + PCI_CAPABILITY_LIST + 1, 0xff, 7);
+    memset(dev->direct_config_read + PCI_MIN_GNT, 0xff, 2);
+    memset(dev->direct_config_write + PCI_COMMAND, 0xff, 12);
+    memset(dev->direct_config_write + PCI_CARDBUS_CIS, 0xff, 8);
+    memset(dev->direct_config_write + PCI_CAPABILITY_LIST + 1, 0xff, 7);
+    memset(dev->direct_config_write + PCI_MIN_GNT, 0xff, 2);
+
     if (get_real_device(dev, dev->host.seg, dev->host.bus,
                         dev->host.dev, dev->host.func)) {
         error_report("pci-assign: Error: Couldn't get real device (%s)!",
diff --git a/hw/device-assignment.h b/hw/device-assignment.h
index 3d20207..ed5defb 100644
--- a/hw/device-assignment.h
+++ b/hw/device-assignment.h
@@ -104,12 +104,13 @@ typedef struct AssignedDevice {
 #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
         uint32_t state;
     } cap;
+    uint8_t direct_config_read[PCI_CONFIG_SPACE_SIZE];
+    uint8_t direct_config_write[PCI_CONFIG_SPACE_SIZE];
     int irq_entries_nr;
     struct kvm_irq_routing_entry *entry;
     void *msix_table_page;
     target_phys_addr_t msix_table_addr;
     int mmio_index;
-    uint32_t emulate_cmd_mask;
     char *configfd_name;
     int32_t bootindex;
     QLIST_ENTRY(AssignedDevice) next;
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux