Some drivers depend on finding capabilities like power management, PCI express/X, vital product data, or vendor specific fields. Now that we have better capability support, we can pass more of these tables through to the guest. Note that VPD and VNDR are direct pass through capabilies, the rest are mostly empty shells with a few writable bits where necessary. It may be possible to consolidate dummy capabilities into common files for other drivers to use, but I prefer to leave them here for now as we figure out what bits to handle directly with hardware and what bits are purely emulated. Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> --- hw/device-assignment.c | 213 +++++++++++++++++++++++++++++++++++++++++++++--- 1 files changed, 199 insertions(+), 14 deletions(-) diff --git a/hw/device-assignment.c b/hw/device-assignment.c index 832c236..cd62b5a 100644 --- a/hw/device-assignment.c +++ b/hw/device-assignment.c @@ -67,6 +67,9 @@ static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, uint32_t val, int len); +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len); + static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region, uint32_t addr, int len, uint32_t *val) { @@ -370,6 +373,27 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) return (uint8_t)assigned_dev_pci_read(d, pos, 1); } +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) +{ + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + ssize_t ret; + int fd = pci_dev->real_device.config_fd; + +again: + ret = pwrite(fd, &val, len, pos); + if (ret != len) { + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) + goto again; + + fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", + __func__, ret, errno); + + exit(1); + } + + return; +} + static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) { int id; @@ -453,10 +477,13 @@ static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, ssize_t ret; AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); + if (address > PCI_CONFIG_HEADER_SIZE && d->config_map[address]) { + return assigned_device_pci_cap_read_config(d, address, len); + } + if (address < 0x4 || (pci_dev->need_emulate_cmd && address == 0x4) || (address >= 0x10 && address <= 0x24) || address == 0x30 || - address == 0x34 || address == 0x3c || address == 0x3d || - (address > PCI_CONFIG_HEADER_SIZE && d->config_map[address])) { + address == 0x34 || address == 0x3c || address == 0x3d) { val = pci_default_read_config(d, address, len); DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); @@ -1251,36 +1278,72 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) #endif #endif -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, uint32_t address, +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, + uint32_t address, int len) +{ + uint8_t cap, cap_id = pci_dev->config_map[address]; + + switch (cap_id) { + + case PCI_CAP_ID_VPD: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS)) { + return assigned_dev_pci_read(pci_dev, address, len); + } + break; + + case PCI_CAP_ID_VNDR: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS + 1)) { + return assigned_dev_pci_read(pci_dev, address, len); + } + break; + } + + return pci_default_read_config(pci_dev, address, len); +} + +static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, + uint32_t address, uint32_t val, int len) { - uint8_t cap_id = pci_dev->config_map[address]; + uint8_t cap, cap_id = pci_dev->config_map[address]; pci_default_write_config(pci_dev, address, val, len); switch (cap_id) { #ifdef KVM_CAP_IRQ_ROUTING case PCI_CAP_ID_MSI: #ifdef KVM_CAP_DEVICE_MSI - { - uint8_t cap = pci_find_capability(pci_dev, cap_id); - if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { - assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); - } + cap = pci_find_capability(pci_dev, cap_id); + if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { + assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); } #endif break; case PCI_CAP_ID_MSIX: #ifdef KVM_CAP_DEVICE_MSIX - { - uint8_t cap = pci_find_capability(pci_dev, cap_id); - if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { - assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); - } + cap = pci_find_capability(pci_dev, cap_id); + if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { + assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); } #endif break; #endif + + case PCI_CAP_ID_VPD: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS)) { + assigned_dev_pci_write(pci_dev, address, val, len); + } + break; + + case PCI_CAP_ID_VNDR: + cap = pci_find_capability(pci_dev, cap_id); + if (!ranges_overlap(address, len, cap, PCI_CAP_FLAGS + 1)) { + assigned_dev_pci_write(pci_dev, address, val, len); + } + break; } } @@ -1345,6 +1408,128 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) #endif #endif + /* Minimal PM support, nothing writable, device appears to NAK changes */ + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM))) { + uint16_t pmc, pmcsr; + pci_add_capability(pci_dev, PCI_CAP_ID_PM, pos, PCI_PM_SIZEOF); + + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); + pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); + pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); + + pmcsr = pci_get_word(pci_dev->config + pos + PCI_PM_CTRL); + pmcsr &= (PCI_PM_CTRL_STATE_MASK); + pmcsr |= PCI_PM_CTRL_NO_SOFT_RST; + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, pmcsr); + + pci_set_byte(pci_dev->config + pos + PCI_PM_PPB_EXTENSIONS, 0); + pci_set_byte(pci_dev->config + pos + PCI_PM_DATA_REGISTER, 0); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP))) { + uint16_t type, devctl, lnkcap, lnksta; + uint32_t devcap; + + pci_add_capability(pci_dev, PCI_CAP_ID_EXP, pos, 0x40); + + type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS); + type = (type & PCI_EXP_FLAGS_TYPE) >> 8; + if (type != PCI_EXP_TYPE_ENDPOINT && + type != PCI_EXP_TYPE_LEG_END && type != PCI_EXP_TYPE_RC_END) { + fprintf(stderr, + "Device assignment only supports endpoint assignment, " + "device type %d\n", type); + return -EINVAL; + } + + /* capabilities, pass existing read-only copy + * PCI_EXP_FLAGS_IRQ: updated by hardware, should be direct read */ + + /* device capabilities: hide FLR */ + devcap = pci_get_long(pci_dev->config + pos + PCI_EXP_DEVCAP); + devcap &= ~PCI_EXP_DEVCAP_FLR; + pci_set_long(pci_dev->config + pos + PCI_EXP_DEVCAP, devcap); + + /* device control: clear all error reporting enable bits, leaving + * leaving only a few host values. Note, these are + * all writable, but not passed to hw. + */ + devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); + devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | + PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); + devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; + pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); + + /* Clear device status */ + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVSTA, 0); + + /* Link capabilities, expose links and latencues, clear reporting */ + lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP); + lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | + PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | + PCI_EXP_LNKCAP_L1EL); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); + pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP, + PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB | + PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES | + PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD); + + /* Link control, pass existing read-only copy. Should be writable? */ + + /* Link status, only expose current speed and width */ + lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); + lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); + + /* Slot capabilities, control, status - not needed for endpoints */ + pci_set_long(pci_dev->config + pos + PCI_EXP_SLTCAP, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_SLTSTA, 0); + + /* Root control, capabilities, status - not needed for endpoints */ + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCTL, 0); + pci_set_word(pci_dev->config + pos + PCI_EXP_RTCAP, 0); + pci_set_long(pci_dev->config + pos + PCI_EXP_RTSTA, 0); + + /* Device capabilities/control 2, pass existing read-only copy */ + /* Link control 2, pass existing read-only copy */ + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX))) { + uint16_t cmd; + uint32_t status; + + /* Only expose the minimum, 8 byte capability */ + pci_add_capability(pci_dev, PCI_CAP_ID_PCIX, pos, 8); + + /* Command register, clear upper bits, including extended modes */ + cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); + cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | + PCI_X_CMD_MAX_SPLIT); + pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); + + /* Status register, update with emulated PCI bus location, clear + * error bits, leave the rest. */ + status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); + status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); + status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn; + status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | + PCI_X_STATUS_SPL_ERR); + pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD))) { + /* Direct R/W passthrough */ + pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8); + } + + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR))) { + uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); + /* Direct R/W passthrough */ + pci_add_capability(pci_dev, PCI_CAP_ID_VNDR, pos, len); + } + return 0; } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html