On 2011-06-28 10:10, Michael S. Tsirkin wrote: > On Mon, Jun 27, 2011 at 08:19:55PM +0200, Jan Kiszka wrote: >> From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> >> >> This drastically simplifies config space access management: Instead of >> coding various range checks and merging bits, set up two access control >> bitmaps. One defines, which bits can be directly read from the device, >> the other allows direct write to the device, also with bit-granularity. >> >> The setup of those bitmaps, specifically the corner cases like the >> capability list status bit, is way more readable than the existing code. >> And the generic config access functions just need one additional logic >> to catch writes to the MSI/MSI-X control flags and call the related >> update handlers. >> >> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx> >> --- >> hw/device-assignment.c | 325 ++++++++++++++++------------------------------- >> hw/device-assignment.h | 3 +- >> 2 files changed, 113 insertions(+), 215 deletions(-) >> >> diff --git a/hw/device-assignment.c b/hw/device-assignment.c >> index 6a2ca8c..be199d2 100644 >> --- a/hw/device-assignment.c >> +++ b/hw/device-assignment.c >> @@ -63,35 +63,6 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev); >> >> static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev); >> >> -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, >> - uint32_t address, >> - uint32_t val, int len); >> - >> -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, >> - uint32_t address, int len); >> - >> -/* Merge the bits set in mask from mval into val. Both val and mval are >> - * at the same addr offset, pos is the starting offset of the mask. */ >> -static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr, >> - int len, uint8_t pos, uint32_t mask) >> -{ >> - if (!ranges_overlap(addr, len, pos, 4)) { >> - return val; >> - } >> - >> - if (addr >= pos) { >> - mask >>= (addr - pos) * 8; >> - } else { >> - mask <<= (pos - addr) * 8; >> - } >> - mask &= 0xffffffffU >> (4 - len) * 8; >> - >> - val &= ~mask; >> - val |= (mval & mask); >> - >> - return val; >> -} >> - >> static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, >> uint32_t addr, int len, uint32_t *val) >> { >> @@ -404,143 +375,6 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start) >> return 0; >> } >> >> -static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address, >> - uint32_t val, int len) >> -{ >> - int fd; >> - ssize_t ret; >> - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d); >> - >> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", >> - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), >> - (uint16_t) address, val, len); >> - >> - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { >> - return assigned_device_pci_cap_write_config(d, address, val, len); >> - } >> - >> - if (ranges_overlap(address, len, PCI_COMMAND, 2)) { >> - pci_default_write_config(d, address, val, len); >> - /* Continue to program the card */ >> - } >> - >> - /* >> - * Catch access to >> - * - base address registers >> - * - ROM base address & capability pointer >> - * - interrupt line & pin >> - */ >> - if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) || >> - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) { >> - pci_default_write_config(d, address, val, len); >> - return; >> - } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) || >> - ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) { >> - uint32_t real_val; >> - >> - pci_default_write_config(d, address, val, len); >> - >> - /* Ensure that writes to overlapping areas we don't virtualize still >> - * hit the device. */ >> - real_val = assigned_dev_pci_read(d, address, len); >> - val = merge_bits(val, real_val, address, len, >> - PCI_CAPABILITY_LIST, 0xff); >> - val = merge_bits(val, real_val, address, len, >> - PCI_INTERRUPT_LINE, 0xffff); >> - } >> - >> - DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", >> - ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), >> - (uint16_t) address, val, len); >> - >> - fd = pci_dev->real_device.config_fd; >> - >> -again: >> - ret = pwrite(fd, &val, len, address); >> - if (ret != len) { >> - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) >> - goto again; >> - >> - fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", >> - __func__, ret, errno); >> - >> - exit(1); >> - } >> -} >> - >> -static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, >> - int len) >> -{ >> - uint32_t val = 0, virt_val; >> - int fd; >> - ssize_t ret; >> - AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d); >> - >> - if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { >> - val = assigned_device_pci_cap_read_config(d, address, len); >> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", >> - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); >> - return val; >> - } >> - >> - /* >> - * Catch access to >> - * - vendor & device ID >> - * - base address registers >> - * - ROM base address >> - */ >> - if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) || >> - ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) || >> - ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) { >> - val = pci_default_read_config(d, address, len); >> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", >> - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); >> - return val; >> - } >> - >> - fd = pci_dev->real_device.config_fd; >> - >> -again: >> - ret = pread(fd, &val, len, address); >> - if (ret != len) { >> - if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) >> - goto again; >> - >> - fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n", >> - __func__, ret, errno); >> - >> - exit(1); >> - } >> - >> - DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", >> - (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); >> - >> - if (pci_dev->emulate_cmd_mask) { >> - val = merge_bits(val, pci_default_read_config(d, address, len), >> - address, len, PCI_COMMAND, pci_dev->emulate_cmd_mask); >> - } >> - >> - /* >> - * Merge bits from virtualized >> - * - capability pointer >> - * - interrupt line & pin >> - */ >> - virt_val = pci_default_read_config(d, address, len); >> - val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff); >> - val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE, 0xffff); >> - >> - if (!pci_dev->cap.available) { >> - /* kill the special capabilities */ >> - if (address == PCI_COMMAND && len == 4) { >> - val &= ~(PCI_STATUS_CAP_LIST << 16); >> - } else if (address == PCI_STATUS) { >> - val &= ~PCI_STATUS_CAP_LIST; >> - } >> - } >> - >> - return val; >> -} >> - >> static int assigned_dev_register_regions(PCIRegion *io_regions, >> unsigned long regions_num, >> AssignedDevice *pci_dev) >> @@ -790,7 +624,8 @@ again: >> /* dealing with virtual function device */ >> snprintf(name, sizeof(name), "%sphysfn/", dir); >> if (!stat(name, &statbuf)) { >> - pci_dev->emulate_cmd_mask = 0xffff; >> + /* always provide the written value on readout */ >> + memset(pci_dev->direct_config_read + PCI_COMMAND, 0xff, 2); >> } >> >> dev->region_number = r; >> @@ -1268,73 +1103,81 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev) >> } >> } >> >> -/* There can be multiple VNDR capabilities per device, we need to find the >> - * one that starts closet to the given address without going over. */ >> -static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address) >> +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev, >> + uint32_t address, int len) >> { >> - uint8_t cap, pos; >> + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); >> + uint32_t virt_val = pci_default_read_config(pci_dev, address, len); >> + uint32_t real_val, direct_mask; >> >> - for (cap = pos = 0; >> - (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos)); >> - pos += PCI_CAP_LIST_NEXT) { >> - if (pos <= address) { >> - cap = MAX(pos, cap); >> - } >> + switch (len) { >> + case 4: >> + direct_mask = >> + pci_get_long(assigned_dev->direct_config_read + address); >> + break; >> + case 2: >> + direct_mask = >> + pci_get_word(assigned_dev->direct_config_read + address); >> + break; >> + case 1: >> + direct_mask = >> + pci_get_byte(assigned_dev->direct_config_read + address); >> + break; >> + default: >> + abort(); >> } >> - return cap; >> -} >> - >> -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, >> - uint32_t address, int len) >> -{ >> - uint8_t cap, cap_id = pci_dev->config_map[address]; >> - uint32_t val; >> - >> - switch (cap_id) { >> - >> - case PCI_CAP_ID_VPD: >> - cap = pci_find_capability(pci_dev, cap_id); >> - val = assigned_dev_pci_read(pci_dev, address, len); >> - return merge_bits(val, pci_get_long(pci_dev->config + address), >> - address, len, cap + PCI_CAP_LIST_NEXT, 0xff); >> - >> - case PCI_CAP_ID_VNDR: >> - cap = find_vndr_start(pci_dev, address); >> - val = assigned_dev_pci_read(pci_dev, address, len); >> - return merge_bits(val, pci_get_long(pci_dev->config + address), >> - address, len, cap + PCI_CAP_LIST_NEXT, 0xff); >> + if (direct_mask) { >> + real_val = assigned_dev_pci_read(pci_dev, address, len); >> + return (virt_val & ~direct_mask) | (real_val & direct_mask); >> + } else { >> + return virt_val; >> } >> - >> - return pci_default_read_config(pci_dev, address, len); >> } >> >> -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, >> - uint32_t address, >> - uint32_t val, int len) >> +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address, >> + uint32_t val, int len) >> { >> - uint8_t cap_id = pci_dev->config_map[address]; >> + AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev); >> + uint32_t direct_mask, full_access; >> >> pci_default_write_config(pci_dev, address, val, len); >> - switch (cap_id) { >> - case PCI_CAP_ID_MSI: >> + >> + if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) { >> if (range_covers_byte(address, len, >> pci_dev->msi_cap + PCI_MSI_FLAGS)) { >> assigned_dev_update_msi(pci_dev); >> } >> - break; >> - >> - case PCI_CAP_ID_MSIX: >> + } else if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) { >> if (range_covers_byte(address, len, >> pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) { >> assigned_dev_update_msix(pci_dev); >> } >> - break; >> + } >> >> - case PCI_CAP_ID_VPD: >> - case PCI_CAP_ID_VNDR: >> - assigned_dev_pci_write(pci_dev, address, val, len); >> + switch (len) { >> + case 4: >> + direct_mask = >> + pci_get_long(assigned_dev->direct_config_write + address); >> + full_access = 0xffffffff; >> + break; >> + case 2: >> + direct_mask = >> + pci_get_word(assigned_dev->direct_config_write + address); >> + full_access = 0xffff; >> break; >> + case 1: >> + direct_mask = >> + pci_get_byte(assigned_dev->direct_config_write + address); >> + full_access = 0xff; >> + break; >> + default: >> + abort(); >> + } >> + if (direct_mask != full_access) { >> + val &= direct_mask; >> + val |= assigned_dev_pci_read(pci_dev, address, len) & ~direct_mask; >> } >> + assigned_dev_pci_write(pci_dev, address, val, len); > > I think we should not write at all for fully virtualized fields > (direct_mask == 0). E.g. PCI BARs. Yes, Alex caught that as well. Fixed already. > >> } >> >> static int assigned_device_pci_cap_init(PCIDevice *pci_dev) >> @@ -1404,6 +1247,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) >> return ret; >> } >> >> + /* direct read except for next cap pointer */ >> + memset(dev->direct_config_read + pos, 0xff, PCI_PM_SIZEOF); >> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0; >> + >> pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); >> pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); >> pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); >> @@ -1438,6 +1285,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) >> return ret; >> } >> >> + /* direct read except for next cap pointer */ >> + memset(dev->direct_config_read + pos, 0xff, size); >> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0; >> + >> type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); >> type = (type & PCI_EXP_FLAGS_TYPE) >> 8; >> if (type != PCI_EXP_TYPE_ENDPOINT && >> @@ -1513,6 +1364,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) >> return ret; >> } >> >> + /* direct read except for next cap pointer */ >> + memset(dev->direct_config_read + pos, 0xff, 8); >> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0; >> + >> /* Command register, clear upper bits, including extended modes */ >> cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); >> cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | >> @@ -1534,6 +1389,13 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) >> if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) { >> return ret; >> } >> + >> + /* direct read except for next cap pointer */ >> + memset(dev->direct_config_read + pos, 0xff, 8); >> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0; >> + >> + /* direct write for cap content */ >> + memset(dev->direct_config_write + pos + 2, 0xff, 6); >> } >> >> /* Devices can have multiple vendor capabilities, get them all */ >> @@ -1545,6 +1407,21 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) >> pos, len)) < 0) { >> return ret; >> } >> + >> + /* direct read except for next cap pointer */ >> + memset(dev->direct_config_read + pos, 0xff, len); >> + dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0; > > Add a function for the above 3 lines? > >> + >> + /* direct write for cap content */ >> + memset(dev->direct_config_write + pos + 2, 0xff, len - 2); >> + } >> + >> + /* If real and virtual capability list status bits differ, virtualize the >> + * access. */ >> + if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) != >> + (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) & >> + PCI_STATUS_CAP_LIST)) { >> + dev->direct_config_read[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST; >> } > > Is this an optimization? What harm does it do to virtualize always? Just replicating the old behavior, but I can virtualize it unconditionally. > >> >> return 0; >> @@ -1694,6 +1571,26 @@ static int assigned_initfn(struct PCIDevice *pci_dev) >> return -1; >> } >> >> + /* >> + * Set up basic config space access control. Will be further refined during >> + * device initialization. >> + * >> + * Direct read/write access is grangted for: > > typo > >> + * - status & command registers, revision ID & class code, cacheline size, >> + * latency timer, header type, BIST >> + * - cardbus CIS pointer, subsystem vendor & ID >> + * - reserved fields >> + * - Min_Gnt & Max_Lat >> + */ >> + memset(dev->direct_config_read + PCI_COMMAND, 0xff, 12); >> + memset(dev->direct_config_read + PCI_CARDBUS_CIS, 0xff, 8); >> + memset(dev->direct_config_read + PCI_CAPABILITY_LIST + 1, 0xff, 7); >> + memset(dev->direct_config_read + PCI_MIN_GNT, 0xff, 2); >> + memset(dev->direct_config_write + PCI_COMMAND, 0xff, 12); >> + memset(dev->direct_config_write + PCI_CARDBUS_CIS, 0xff, 8); >> + memset(dev->direct_config_write + PCI_CAPABILITY_LIST + 1, 0xff, 7); >> + memset(dev->direct_config_write + PCI_MIN_GNT, 0xff, 2); >> + >> if (get_real_device(dev, dev->host.seg, dev->host.bus, >> dev->host.dev, dev->host.func)) { >> error_report("pci-assign: Error: Couldn't get real device (%s)!", >> diff --git a/hw/device-assignment.h b/hw/device-assignment.h >> index 3d20207..ed5defb 100644 >> --- a/hw/device-assignment.h >> +++ b/hw/device-assignment.h >> @@ -104,12 +104,13 @@ typedef struct AssignedDevice { >> #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2) >> uint32_t state; >> } cap; >> + uint8_t direct_config_read[PCI_CONFIG_SPACE_SIZE]; >> + uint8_t direct_config_write[PCI_CONFIG_SPACE_SIZE]; > > What direct means isn't obvious. Add a comment? Ok. > > We could revert the logic, have bits for > emulate_config_read/emulate_config_write. I think that most of the > registers can safely be read/written directly, so protecting relevant > bits will be less work. Is that true? I intentionally chose whitelisting to avoid accidentally granting access to forgotten fields. I don't think it's a good idea to switch to blacklisting. Jan
Attachment:
signature.asc
Description: OpenPGP digital signature