This drastically simplifies config space access management: Instead of coding various range checks and merging bits, set up two access control bitmaps. One defines, which bits can be directly read from the device, the other allows direct write to the device, also with bit-granularity. The setup of those bitmaps, specifically the corner cases like the capability list status bit, is way more readable than the existing code. And the generic config access functions just need one additional logic to catch writes to the MSI/MSI-X control flags and call the related update handlers. Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> --- hw/device-assignment.c | 320 ++++++++++++++++-------------------------------- hw/device-assignment.h | 3 +- 2 files changed, 109 insertions(+), 214 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 32e9d2c..bfa0edf 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -65,35 +65,6 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, - uint32_t address, - uint32_t val, int len); - -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, - uint32_t address, int len); - -/* Merge the bits set in mask from mval into val. Both val and mval are - * at the same addr offset, pos is the starting offset of the mask. */ -static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr, - int len, uint8_t pos, uint32_t mask) -{ - if (!ranges_overlap(addr, len, pos, 4)) { - return val; - } - - if (addr >= pos) { - mask >>= (addr - pos) * 8; - } else { - mask <<= (pos - addr) * 8; - } - mask &= 0xffffffffU >> (4 - len) * 8; - - val &= ~mask; - val |= (mval & mask); - - return val; -} - static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -374,6 +345,24 @@ again: return; } +static void assigned_dev_emulate_config_read(AssignedDevice *dev, + uint32_t offset, uint32_t len) +{ + memset(dev->emulate_config_read + offset, 0xff, len); +} + +static void assigned_dev_direct_config_read(AssignedDevice *dev, + uint32_t offset, uint32_t len) +{ + memset(dev->emulate_config_read + offset, 0, len); +} + +static void assigned_dev_direct_config_write(AssignedDevice *dev, + uint32_t offset, uint32_t len) +{ + memset(dev->emulate_config_write + offset, 0, len); +} + static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) { int id; @@ -403,143 +392,6 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) return 0; } -static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address, - uint32_t val, int len) -{ - int fd; - ssize_t ret; - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d); - - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), - (uint16_t) address, val, len); - - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { - return assigned_device_pci_cap_write_config(d, address, val, len); - } - - if (ranges_overlap(address, len, PCI_COMMAND, 2)) { - pci_default_write_config(d, address, val, len); - /* Continue to program the card */ - } - - /* - * Catch access to - * - base address registers - * - ROM base address & capability pointer - * - interrupt line & pin - */ - if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) || - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) { - pci_default_write_config(d, address, val, len); - return; - } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) || - ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) { - uint32_t real_val; - - pci_default_write_config(d, address, val, len); - - /* Ensure that writes to overlapping areas we don't virtualize still - * hit the device. */ - real_val = assigned_dev_pci_read(d, address, len); - val = merge_bits(val, real_val, address, len, - PCI_CAPABILITY_LIST, 0xff); - val = merge_bits(val, real_val, address, len, - PCI_INTERRUPT_LINE, 0xffff); - } - - DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), - (uint16_t) address, val, len); - - fd = pci_dev->real_device.config_fd; - -again: - ret = pwrite(fd, &val, len, address); - if (ret != len) { - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) - goto again; - - fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", - __func__, ret, errno); - - exit(1); - } -} - -static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, - int len) -{ - uint32_t val = 0, virt_val; - int fd; - ssize_t ret; - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d); - - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { - val = assigned_device_pci_cap_read_config(d, address, len); - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); - return val; - } - - /* - * Catch access to - * - vendor & device ID - * - base address registers - * - ROM base address - */ - if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) || - ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) || - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) { - val = pci_default_read_config(d, address, len); - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); - return val; - } - - fd = pci_dev->real_device.config_fd; - -again: - ret = pread(fd, &val, len, address); - if (ret != len) { - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) - goto again; - - fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n", - __func__, ret, errno); - - exit(1); - } - - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); - - if (pci_dev->emulate_cmd_mask) { - val = merge_bits(val, pci_default_read_config(d, address, len), - address, len, PCI_COMMAND, pci_dev->emulate_cmd_mask); - } - - /* - * Merge bits from virtualized - * - capability pointer - * - interrupt line & pin - */ - virt_val = pci_default_read_config(d, address, len); - val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff); - val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE, 0xffff); - - if (!pci_dev->cap.available) { - /* kill the special capabilities */ - if (address == PCI_COMMAND && len == 4) { - val &= ~(PCI_STATUS_CAP_LIST << 16); - } else if (address == PCI_STATUS) { - val &= ~PCI_STATUS_CAP_LIST; - } - } - - return val; -} - static int assigned_dev_register_regions(PCIRegion *io_regions, unsigned long regions_num, AssignedDevice *pci_dev) @@ -785,7 +637,8 @@ again: /* dealing with virtual function device */ snprintf(name, sizeof(name), "%sphysfn/", dir); if (!stat(name, &statbuf)) { - pci_dev->emulate_cmd_mask = 0xffff; + /* always provide the written value on readout */ + assigned_dev_emulate_config_read(pci_dev, PCI_COMMAND, 2); } dev->region_number = r; @@ -1258,75 +1111,70 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) } } -/* There can be multiple VNDR capabilities per device, we need to find the - * one that starts closet to the given address without going over. */ -static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address) -{ - uint8_t cap, pos; - - for (cap = pos = 0; - (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); - pos += PCI_CAP_LIST_NEXT) { - if (pos <= address) { - cap = MAX(pos, cap); - } - } - return cap; -} - -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, - uint32_t address, int len) +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, + uint32_t address, int len) { - uint8_t cap, cap_id = pci_dev->config_map[address]; - uint32_t val; + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); + uint32_t virt_val = pci_default_read_config(pci_dev, address, len); + uint32_t real_val, emulate_mask, full_emulation_mask; - switch (cap_id) { + emulate_mask = 0; + memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); + emulate_mask = le32_to_cpu(emulate_mask); - case PCI_CAP_ID_VPD: - cap = pci_find_capability(pci_dev, cap_id); - val = assigned_dev_pci_read(pci_dev, address, len); - return merge_bits(val, pci_get_long(pci_dev->config + address), - address, len, cap + PCI_CAP_LIST_NEXT, 0xff); + full_emulation_mask = 0xffffffff >> (32 - len * 8); - case PCI_CAP_ID_VNDR: - cap = find_vndr_start(pci_dev, address); - val = assigned_dev_pci_read(pci_dev, address, len); - return merge_bits(val, pci_get_long(pci_dev->config + address), - address, len, cap + PCI_CAP_LIST_NEXT, 0xff); + if (emulate_mask != full_emulation_mask) { + real_val = assigned_dev_pci_read(pci_dev, address, len); + return (virt_val & emulate_mask) | (real_val & ~emulate_mask); + } else { + return virt_val; } - - return pci_default_read_config(pci_dev, address, len); } -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, - uint32_t address, - uint32_t val, int len) +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, + uint32_t val, int len) { - uint8_t cap_id = pci_dev->config_map[address]; + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); + uint32_t emulate_mask, full_emulation_mask; pci_default_write_config(pci_dev, address, val, len); - switch (cap_id) { - case PCI_CAP_ID_MSI: + + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { if (range_covers_byte(address, len, pci_dev->msi_cap + PCI_MSI_FLAGS)) { assigned_dev_update_msi(pci_dev); } - break; - - case PCI_CAP_ID_MSIX: + } + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { if (range_covers_byte(address, len, pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) { assigned_dev_update_msix(pci_dev); } - break; + } + + emulate_mask = 0; + memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len); + emulate_mask = le32_to_cpu(emulate_mask); - case PCI_CAP_ID_VPD: - case PCI_CAP_ID_VNDR: + full_emulation_mask = 0xffffffff >> (32 - len * 8); + + if (emulate_mask != full_emulation_mask) { + if (emulate_mask) { + val &= ~emulate_mask; + val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask; + } assigned_dev_pci_write(pci_dev, address, val, len); - break; } } +static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset, + uint32_t len) +{ + assigned_dev_direct_config_read(dev, offset, len); + assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1); +} + static int assigned_device_pci_cap_init(PCIDevice *pci_dev) { AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); @@ -1396,6 +1244,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) return ret; } + assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF); + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); @@ -1449,6 +1299,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) return ret; } + assigned_dev_setup_cap_read(dev, pos, size); + type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); type = (type & PCI_EXP_FLAGS_TYPE) >> 8; if (type != PCI_EXP_TYPE_ENDPOINT && @@ -1524,6 +1376,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) return ret; } + assigned_dev_setup_cap_read(dev, pos, 8); + /* Command register, clear upper bits, including extended modes */ cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | @@ -1545,6 +1399,11 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) { return ret; } + + assigned_dev_setup_cap_read(dev, pos, 8); + + /* direct write for cap content */ + assigned_dev_direct_config_write(dev, pos + 2, 6); } /* Devices can have multiple vendor capabilities, get them all */ @@ -1556,6 +1415,19 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) pos, len)) < 0) { return ret; } + + assigned_dev_setup_cap_read(dev, pos, len); + + /* direct write for cap content */ + assigned_dev_direct_config_write(dev, pos + 2, len - 2); + } + + /* If real and virtual capability list status bits differ, virtualize the + * access. */ + if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) != + (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) & + PCI_STATUS_CAP_LIST)) { + dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST; } return 0; @@ -1704,6 +1576,28 @@ static int assigned_initfn(struct PCIDevice *pci_dev) return -1; } + /* + * Set up basic config space access control. Will be further refined during + * device initialization. + */ + assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE); + assigned_dev_direct_config_read(dev, PCI_COMMAND, 2); + assigned_dev_direct_config_read(dev, PCI_STATUS, 2); + assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1); + assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3); + assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1); + assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1); + assigned_dev_direct_config_read(dev, PCI_HEADER_TYPE, 1); + assigned_dev_direct_config_read(dev, PCI_BIST, 1); + assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4); + assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); + assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2); + assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7); + assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1); + assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1); + memcpy(dev->emulate_config_write, dev->emulate_config_read, + sizeof(dev->emulate_config_read)); + if (get_real_device(dev, dev->host.seg, dev->host.bus, dev->host.dev, dev->host.func)) { error_report("pci-assign: Error: Couldn't get real device (%s)!", diff --git a/hw/device-assignment.h b/hw/device-assignment.h index 06a1fb3..1b4aecc 100644 --- a/hw/device-assignment.h +++ b/hw/device-assignment.h @@ -104,12 +104,13 @@ typedef struct AssignedDevice { #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) uint32_t state; } cap; + uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE]; + uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE]; int irq_entries_nr; struct kvm_irq_routing_entry *entry; void *msix_table_page; target_phys_addr_t msix_table_addr; MemoryRegion mmio; - uint32_t emulate_cmd_mask; char *configfd_name; int32_t bootindex; QLIST_ENTRY(AssignedDevice) next; -- 1.7.3.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html