On Thu, Nov 11, 2010 at 07:56:46PM -0700, Alex Williamson wrote: > Some drivers depend on finding capabilities like power management, > PCI express/X, vital product data, or vendor specific fields. Now > that we have better capability support, we can pass more of these > tables through to the guest. Note that VPD and VNDR are direct pass > through capabilies, the rest are mostly empty shells with a few > writable bits where necessary. > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > --- > > hw/device-assignment.c | 160 +++++++++++++++++++++++++++++++++++++++++++++--- > 1 files changed, 149 insertions(+), 11 deletions(-) > > diff --git a/hw/device-assignment.c b/hw/device-assignment.c > index 179c7dc..1b228ad 100644 > --- a/hw/device-assignment.c > +++ b/hw/device-assignment.c > @@ -366,6 +366,27 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos) > return (uint8_t)assigned_dev_pci_read(d, pos, 1); > } > > +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len) > +{ > + AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev); > + ssize_t ret; > + int fd = pci_dev->real_device.config_fd; > + > +again: > + ret = pwrite(fd, &val, len, pos); > + if (ret != len) { > + if ((ret < 0) && (errno == EINTR || errno == EAGAIN)) > + goto again; do {} while() ? > + > + fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n", > + __func__, ret, errno); > + > + exit(1); > + } > + > + return; > +} > + > static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap) > { > int id; > @@ -1244,37 +1265,75 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos) > #endif > #endif > > +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev, > + uint8_t cap_id, > + uint32_t address, int len) > +{ > + uint8_t cap; > + > + switch (cap_id) { > + > + case PCI_CAP_ID_VPD: > + cap = pci_find_capability(pci_dev, cap_id); > + if (address - cap >= PCI_CAP_FLAGS) { > + return assigned_dev_pci_read(pci_dev, address, len); > + } > + break; > + > + case PCI_CAP_ID_VNDR: > + cap = pci_find_capability(pci_dev, cap_id); > + if (address - cap > PCI_CAP_FLAGS) { > + return assigned_dev_pci_read(pci_dev, address, len); > + } > + break; > + } > + > + return pci_default_cap_read_config(pci_dev, cap_id, address, len); > +} > + > static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev, > uint8_t cap_id, > uint32_t address, > uint32_t val, int len) > { > + uint8_t cap; > + > pci_default_cap_write_config(pci_dev, cap_id, address, val, len); > > switch (cap_id) { > #ifdef KVM_CAP_IRQ_ROUTING > case PCI_CAP_ID_MSI: > #ifdef KVM_CAP_DEVICE_MSI > - { > - uint8_t cap = pci_find_capability(pci_dev, cap_id); > - if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { > - assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); > - } > + cap = pci_find_capability(pci_dev, cap_id); > + if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) { > + assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS); > } > #endif > break; > > case PCI_CAP_ID_MSIX: > #ifdef KVM_CAP_DEVICE_MSIX > - { > - uint8_t cap = pci_find_capability(pci_dev, cap_id); > - if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { > - assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); > - } > + cap = pci_find_capability(pci_dev, cap_id); > + if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) { > + assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS); > } > #endif > break; > #endif > + > + case PCI_CAP_ID_VPD: > + cap = pci_find_capability(pci_dev, cap_id); > + if (address - cap >= PCI_CAP_FLAGS) { > + assigned_dev_pci_write(pci_dev, address, val, len); > + } > + break; > + > + case PCI_CAP_ID_VNDR: > + cap = pci_find_capability(pci_dev, cap_id); > + if (address - cap > PCI_CAP_FLAGS) { > + assigned_dev_pci_write(pci_dev, address, val, len); > + } > + break; I have a feeling we should use overlap functions instead of address math. What do you think? Also - put cap offsets in assigned device structure to avoid find calls? > } > return; > } > @@ -1340,6 +1399,84 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev) > #endif > #endif > > + /* Minimal PM support, make the state bits writable so the guest > + * thinks it's doing something. */ > + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM))) { > + uint16_t pmc, pmcsr; > + pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_PM, pos, > + PCI_PM_SIZEOF); > + > + pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS); > + pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI); > + pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc); > + > + pmcsr = pci_get_word(pci_dev->config + pos + PCI_PM_CTRL); > + pmcsr &= (PCI_PM_CTRL_STATE_MASK); > + pmcsr |= PCI_PM_CTRL_NO_SOFT_RST; > + pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, pmcsr); > + > + pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL, > + PCI_PM_CTRL_STATE_MASK); > + } we don't pass anything to device. So - can this be put in pci_pm.c so that emulated devices can use this too? > + > + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP))) { > + uint16_t devctl, lnkcap, lnksta; > + > + pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_EXP, pos, 0x40); > + > + devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL); > + devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) | > + PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN; > + pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl); > + devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME; > + pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl); > + > + lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP); > + lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW | > + PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL | > + PCI_EXP_LNKCAP_L1EL); > + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap); > + > + pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP, > + PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB | > + PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES | > + PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD); > + > + lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA); > + lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW); > + pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta); > + > + pci_set_word(pci_dev->wmask + pos + 0x28, 0x1f); > + pci_set_word(pci_dev->wmask + pos + 0x30, 0xfbf); This seems to overlap functionally with the express work upstream. Can code from there be reused? I also wonder whether is affects the guest OS if it finds an express device on a non-express bridge. > + } > + > + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD))) { > + pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_VPD, pos, 0x8); > + } > + > + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR))) { > + uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS); > + pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_VNDR, pos, len); > + } > + > + if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX))) { > + uint16_t cmd; > + uint32_t status; > + > + pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_PCIX, pos, 0x8); > + > + cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD); > + cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ | > + PCI_X_CMD_MAX_SPLIT); > + pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd); > + > + status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS); > + status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN); > + status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn; > + status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL | > + PCI_X_STATUS_SPL_ERR); > + pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status); > + } This will be handy for non-assignment case so I'd like to see this moved out of device-assignment.c: we could create pcix.c or just add to pci.c. > return 0; > } > > @@ -1466,7 +1603,8 @@ static int assigned_initfn(struct PCIDevice *pci_dev) > dev->h_busnr = dev->host.bus; > dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func); > > - pci_register_capability_handlers(pci_dev, NULL, > + pci_register_capability_handlers(pci_dev, > + assigned_device_pci_cap_read_config, > assigned_device_pci_cap_write_config); Maybe these could go away? > > if (assigned_device_pci_cap_init(pci_dev) < 0) -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html