On Thu, 2011-07-21 at 20:14 +0200, Jan Kiszka wrote: > This drastically simplifies config space access management: Instead of > coding various range checks and merging bits, set up two access control > bitmaps. One defines, which bits can be directly read from the device, > the other allows direct write to the device, also with bit-granularity. > > The setup of those bitmaps, specifically the corner cases like the > capability list status bit, is way more readable than the existing code. > And the generic config access functions just need one additional logic > to catch writes to the MSI/MSI-X control flags and call the related > update handlers. > > Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> > --- > hw/device-assignment.c | 320 ++++++++++++++++-------------------------------- > hw/device-assignment.h | 3 +- > 2 files changed, 109 insertions(+), 214 deletions(-) > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c > index 6a2ca8c..5c24c78 100644 > --- a/hw/device-assignment.c > +++ b/hw/device-assignment.c > @@ -63,35 +63,6 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); > > static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); > > -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, > - uint32_t address, > - uint32_t val, int len); > - > -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, > - uint32_t address, int len); > - > -/* Merge the bits set in mask from mval into val. Both val and mval are > - * at the same addr offset, pos is the starting offset of the mask. */ > -static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr, > - int len, uint8_t pos, uint32_t mask) > -{ > - if (!ranges_overlap(addr, len, pos, 4)) { > - return val; > - } > - > - if (addr >= pos) { > - mask >>= (addr - pos) * 8; > - } else { > - mask <<= (pos - addr) * 8; > - } > - mask &= 0xffffffffU >> (4 - len) * 8; > - > - val &= ~mask; > - val |= (mval & mask); > - > - return val; > -} > - > static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, > uint32_t addr, int len, uint32_t *val) > { > @@ -375,6 +346,24 @@ again: > return; > } > > +static void assigned_dev_emulate_config_read(AssignedDevice *dev, > + uint32_t offset, uint32_t len) > +{ > + memset(dev->emulate_config_read + offset, 0xff, len); > +} > + > +static void assigned_dev_direct_config_read(AssignedDevice *dev, > + uint32_t offset, uint32_t len) > +{ > + memset(dev->emulate_config_read + offset, 0, len); > +} > + > +static void assigned_dev_direct_config_write(AssignedDevice *dev, > + uint32_t offset, uint32_t len) > +{ > + memset(dev->emulate_config_write + offset, 0, len); > +} > + > static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) > { > int id; > @@ -404,143 +393,6 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) > return 0; > } > > -static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address, > - uint32_t val, int len) > -{ > - int fd; > - ssize_t ret; > - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d); > - > - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), > - (uint16_t) address, val, len); > - > - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { > - return assigned_device_pci_cap_write_config(d, address, val, len); > - } > - > - if (ranges_overlap(address, len, PCI_COMMAND, 2)) { > - pci_default_write_config(d, address, val, len); > - /* Continue to program the card */ > - } > - > - /* > - * Catch access to > - * - base address registers > - * - ROM base address & capability pointer > - * - interrupt line & pin > - */ > - if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) || > - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) { > - pci_default_write_config(d, address, val, len); > - return; > - } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) || > - ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) { > - uint32_t real_val; > - > - pci_default_write_config(d, address, val, len); > - > - /* Ensure that writes to overlapping areas we don't virtualize still > - * hit the device. */ > - real_val = assigned_dev_pci_read(d, address, len); > - val = merge_bits(val, real_val, address, len, > - PCI_CAPABILITY_LIST, 0xff); > - val = merge_bits(val, real_val, address, len, > - PCI_INTERRUPT_LINE, 0xffff); > - } > - > - DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", > - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), > - (uint16_t) address, val, len); > - > - fd = pci_dev->real_device.config_fd; > - > -again: > - ret = pwrite(fd, &val, len, address); > - if (ret != len) { > - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) > - goto again; > - > - fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", > - __func__, ret, errno); > - > - exit(1); > - } > -} > - > -static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, > - int len) > -{ > - uint32_t val = 0, virt_val; > - int fd; > - ssize_t ret; > - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d); > - > - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { > - val = assigned_device_pci_cap_read_config(d, address, len); > - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); > - return val; > - } > - > - /* > - * Catch access to > - * - vendor & device ID > - * - base address registers > - * - ROM base address > - */ > - if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) || > - ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) || > - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) { > - val = pci_default_read_config(d, address, len); > - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); > - return val; > - } > - > - fd = pci_dev->real_device.config_fd; > - > -again: > - ret = pread(fd, &val, len, address); > - if (ret != len) { > - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) > - goto again; > - > - fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n", > - __func__, ret, errno); > - > - exit(1); > - } > - > - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); > - > - if (pci_dev->emulate_cmd_mask) { > - val = merge_bits(val, pci_default_read_config(d, address, len), > - address, len, PCI_COMMAND, pci_dev->emulate_cmd_mask); > - } > - > - /* > - * Merge bits from virtualized > - * - capability pointer > - * - interrupt line & pin > - */ > - virt_val = pci_default_read_config(d, address, len); > - val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff); > - val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE, 0xffff); > - > - if (!pci_dev->cap.available) { > - /* kill the special capabilities */ > - if (address == PCI_COMMAND && len == 4) { > - val &= ~(PCI_STATUS_CAP_LIST << 16); > - } else if (address == PCI_STATUS) { > - val &= ~PCI_STATUS_CAP_LIST; > - } > - } > - > - return val; > -} > - > static int assigned_dev_register_regions(PCIRegion *io_regions, > unsigned long regions_num, > AssignedDevice *pci_dev) > @@ -790,7 +642,8 @@ again: > /* dealing with virtual function device */ > snprintf(name, sizeof(name), "%sphysfn/", dir); > if (!stat(name, &statbuf)) { > - pci_dev->emulate_cmd_mask = 0xffff; > + /* always provide the written value on readout */ > + assigned_dev_emulate_config_read(pci_dev, PCI_COMMAND, 2); > } > > dev->region_number = r; > @@ -1268,75 +1121,70 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) > } > } > > -/* There can be multiple VNDR capabilities per device, we need to find the > - * one that starts closet to the given address without going over. */ > -static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address) > -{ > - uint8_t cap, pos; > - > - for (cap = pos = 0; > - (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); > - pos += PCI_CAP_LIST_NEXT) { > - if (pos <= address) { > - cap = MAX(pos, cap); > - } > - } > - return cap; > -} > - > -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, > - uint32_t address, int len) > +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, > + uint32_t address, int len) > { > - uint8_t cap, cap_id = pci_dev->config_map[address]; > - uint32_t val; > + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); > + uint32_t virt_val = pci_default_read_config(pci_dev, address, len); > + uint32_t real_val, emulate_mask, full_emulation; > > - switch (cap_id) { > + emulate_mask = 0; > + memcpy(&emulate_mask, assigned_dev->emulate_config_read + address, len); > + emulate_mask = le32_to_cpu(emulate_mask); > > - case PCI_CAP_ID_VPD: > - cap = pci_find_capability(pci_dev, cap_id); > - val = assigned_dev_pci_read(pci_dev, address, len); > - return merge_bits(val, pci_get_long(pci_dev->config + address), > - address, len, cap + PCI_CAP_LIST_NEXT, 0xff); > + full_emulation = 0xffffffff >> (4 - len * 8); > > - case PCI_CAP_ID_VNDR: > - cap = find_vndr_start(pci_dev, address); > - val = assigned_dev_pci_read(pci_dev, address, len); > - return merge_bits(val, pci_get_long(pci_dev->config + address), > - address, len, cap + PCI_CAP_LIST_NEXT, 0xff); > + if (emulate_mask != full_emulation) { > + real_val = assigned_dev_pci_read(pci_dev, address, len); > + return (virt_val & emulate_mask) | (real_val & ~emulate_mask); > + } else { > + return virt_val; > } > - > - return pci_default_read_config(pci_dev, address, len); > } > > -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, > - uint32_t address, > - uint32_t val, int len) > +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, > + uint32_t val, int len) > { > - uint8_t cap_id = pci_dev->config_map[address]; > + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); > + uint32_t emulate_mask, full_emulation; > > pci_default_write_config(pci_dev, address, val, len); > - switch (cap_id) { > - case PCI_CAP_ID_MSI: > + > + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { > if (range_covers_byte(address, len, > pci_dev->msi_cap + PCI_MSI_FLAGS)) { > assigned_dev_update_msi(pci_dev); > } > - break; > - > - case PCI_CAP_ID_MSIX: > + } > + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { > if (range_covers_byte(address, len, > pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) { > assigned_dev_update_msix(pci_dev); > } > - break; > + } > + > + emulate_mask = 0; > + memcpy(&emulate_mask, assigned_dev->emulate_config_write + address, len); > + emulate_mask = le32_to_cpu(emulate_mask); > > - case PCI_CAP_ID_VPD: > - case PCI_CAP_ID_VNDR: > + full_emulation = 0xffffffff >> (4 - len * 8); ^^^^^^^^^^^^ Missing parens? This comes out negative for all values of len that would be used here. Naming this full_emulation_mask might help readability. Same for write below. Thanks, Alex > + > + if (emulate_mask != full_emulation) { > + if (emulate_mask) { > + val &= ~emulate_mask; > + val |= assigned_dev_pci_read(pci_dev, address, len) & emulate_mask; > + } > assigned_dev_pci_write(pci_dev, address, val, len); > - break; > } > } > > +static void assigned_dev_setup_cap_read(AssignedDevice *dev, uint32_t offset, > + uint32_t len) > +{ > + assigned_dev_direct_config_read(dev, offset, len); > + assigned_dev_emulate_config_read(dev, offset + PCI_CAP_LIST_NEXT, 1); > +} > + > static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > { > AssignedDevice *dev = DO_UPCAST(AssignedDevice, dev, pci_dev); > @@ -1404,6 +1252,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > return ret; > } > > + assigned_dev_setup_cap_read(dev, pos, PCI_PM_SIZEOF); > + > pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); > pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); > pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); > @@ -1438,6 +1288,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > return ret; > } > > + assigned_dev_setup_cap_read(dev, pos, size); > + > type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); > type = (type & PCI_EXP_FLAGS_TYPE) >> 8; > if (type != PCI_EXP_TYPE_ENDPOINT && > @@ -1513,6 +1365,8 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > return ret; > } > > + assigned_dev_setup_cap_read(dev, pos, 8); > + > /* Command register, clear upper bits, including extended modes */ > cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); > cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | > @@ -1534,6 +1388,11 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) { > return ret; > } > + > + assigned_dev_setup_cap_read(dev, pos, 8); > + > + /* direct write for cap content */ > + assigned_dev_direct_config_write(dev, pos + 2, 6); > } > > /* Devices can have multiple vendor capabilities, get them all */ > @@ -1545,6 +1404,19 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > pos, len)) < 0) { > return ret; > } > + > + assigned_dev_setup_cap_read(dev, pos, len); > + > + /* direct write for cap content */ > + assigned_dev_direct_config_write(dev, pos + 2, len - 2); > + } > + > + /* If real and virtual capability list status bits differ, virtualize the > + * access. */ > + if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) != > + (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) & > + PCI_STATUS_CAP_LIST)) { > + dev->emulate_config_read[PCI_STATUS] |= PCI_STATUS_CAP_LIST; > } > > return 0; > @@ -1694,6 +1566,28 @@ static int assigned_initfn(struct PCIDevice *pci_dev) > return -1; > } > > + /* > + * Set up basic config space access control. Will be further refined during > + * device initialization. > + */ > + assigned_dev_emulate_config_read(dev, 0, PCI_CONFIG_SPACE_SIZE); > + assigned_dev_direct_config_read(dev, PCI_COMMAND, 2); > + assigned_dev_direct_config_read(dev, PCI_STATUS, 2); > + assigned_dev_direct_config_read(dev, PCI_REVISION_ID, 1); > + assigned_dev_direct_config_read(dev, PCI_CLASS_PROG, 3); > + assigned_dev_direct_config_read(dev, PCI_CACHE_LINE_SIZE, 1); > + assigned_dev_direct_config_read(dev, PCI_LATENCY_TIMER, 1); > + assigned_dev_direct_config_read(dev, PCI_HEADER_TYPE, 1); > + assigned_dev_direct_config_read(dev, PCI_BIST, 1); > + assigned_dev_direct_config_read(dev, PCI_CARDBUS_CIS, 4); > + assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_VENDOR_ID, 2); > + assigned_dev_direct_config_read(dev, PCI_SUBSYSTEM_ID, 2); > + assigned_dev_direct_config_read(dev, PCI_CAPABILITY_LIST + 1, 7); > + assigned_dev_direct_config_read(dev, PCI_MIN_GNT, 1); > + assigned_dev_direct_config_read(dev, PCI_MAX_LAT, 1); > + memcpy(dev->emulate_config_write, dev->emulate_config_read, > + sizeof(dev->emulate_config_read)); > + > if (get_real_device(dev, dev->host.seg, dev->host.bus, > dev->host.dev, dev->host.func)) { > error_report("pci-assign: Error: Couldn't get real device (%s)!", > diff --git a/hw/device-assignment.h b/hw/device-assignment.h > index 3d20207..231d9ee 100644 > --- a/hw/device-assignment.h > +++ b/hw/device-assignment.h > @@ -104,12 +104,13 @@ typedef struct AssignedDevice { > #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) > uint32_t state; > } cap; > + uint8_t emulate_config_read[PCI_CONFIG_SPACE_SIZE]; > + uint8_t emulate_config_write[PCI_CONFIG_SPACE_SIZE]; > int irq_entries_nr; > struct kvm_irq_routing_entry *entry; > void *msix_table_page; > target_phys_addr_t msix_table_addr; > int mmio_index; > - uint32_t emulate_cmd_mask; > char *configfd_name; > int32_t bootindex; > QLIST_ENTRY(AssignedDevice) next; -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html