Re: [RFC PATCH 8/8] device-assignment: pass through and stub more PCI caps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Nov 11, 2010 at 07:56:46PM -0700, Alex Williamson wrote:
> Some drivers depend on finding capabilities like power management,
> PCI express/X, vital product data, or vendor specific fields.  Now
> that we have better capability support, we can pass more of these
> tables through to the guest.  Note that VPD and VNDR are direct pass
> through capabilies, the rest are mostly empty shells with a few
> writable bits where necessary.
> 
> Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx>
> ---
> 
>  hw/device-assignment.c |  160 +++++++++++++++++++++++++++++++++++++++++++++---
>  1 files changed, 149 insertions(+), 11 deletions(-)
> 
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index 179c7dc..1b228ad 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -366,6 +366,27 @@ static uint8_t assigned_dev_pci_read_byte(PCIDevice *d, int pos)
>      return (uint8_t)assigned_dev_pci_read(d, pos, 1);
>  }
>  
> +static void assigned_dev_pci_write(PCIDevice *d, int pos, uint32_t val, int len)
> +{
> +    AssignedDevice *pci_dev = container_of(d, AssignedDevice, dev);
> +    ssize_t ret;
> +    int fd = pci_dev->real_device.config_fd;
> +
> +again:
> +    ret = pwrite(fd, &val, len, pos);
> +    if (ret != len) {
> +	if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
> +	    goto again;


do {} while() ?

> +
> +	fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
> +		__func__, ret, errno);
> +
> +	exit(1);
> +    }
> +
> +    return;
> +}
> +
>  static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap)
>  {
>      int id;
> @@ -1244,37 +1265,75 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev, unsigned int ctrl_pos)
>  #endif
>  #endif
>  
> +static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
> +                                                    uint8_t cap_id,
> +                                                    uint32_t address, int len)
> +{
> +    uint8_t cap;
> +
> +    switch (cap_id) {
> +
> +    case PCI_CAP_ID_VPD:
> +        cap = pci_find_capability(pci_dev, cap_id);
> +        if (address - cap >= PCI_CAP_FLAGS) {
> +            return assigned_dev_pci_read(pci_dev, address, len);
> +        }
> +        break;
> +
> +    case PCI_CAP_ID_VNDR:
> +        cap = pci_find_capability(pci_dev, cap_id);
> +        if (address - cap > PCI_CAP_FLAGS) {
> +            return assigned_dev_pci_read(pci_dev, address, len);
> +        }
> +        break;
> +    }
> +
> +    return pci_default_cap_read_config(pci_dev, cap_id, address, len);
> +}
> +
>  static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
>                                                   uint8_t cap_id,
>                                                   uint32_t address,
>                                                   uint32_t val, int len)
>  {
> +    uint8_t cap;
> +
>      pci_default_cap_write_config(pci_dev, cap_id, address, val, len);
>  
>      switch (cap_id) {
>  #ifdef KVM_CAP_IRQ_ROUTING
>      case PCI_CAP_ID_MSI:
>  #ifdef KVM_CAP_DEVICE_MSI
> -        {
> -            uint8_t cap = pci_find_capability(pci_dev, cap_id);
> -            if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) {
> -                assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS);
> -            }
> +        cap = pci_find_capability(pci_dev, cap_id);
> +        if (ranges_overlap(address - cap, len, PCI_MSI_FLAGS, 1)) {
> +            assigned_dev_update_msi(pci_dev, cap + PCI_MSI_FLAGS);
>          }
>  #endif
>          break;
>  
>      case PCI_CAP_ID_MSIX:
>  #ifdef KVM_CAP_DEVICE_MSIX
> -        {
> -            uint8_t cap = pci_find_capability(pci_dev, cap_id);
> -            if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) {
> -                assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS);
> -            }
> +        cap = pci_find_capability(pci_dev, cap_id);
> +        if (ranges_overlap(address - cap, len, PCI_MSIX_FLAGS + 1, 1)) {
> +            assigned_dev_update_msix(pci_dev, cap + PCI_MSIX_FLAGS);
>          }
>  #endif
>          break;
>  #endif
> +
> +    case PCI_CAP_ID_VPD:
> +        cap = pci_find_capability(pci_dev, cap_id);
> +        if (address - cap >= PCI_CAP_FLAGS) {
> +            assigned_dev_pci_write(pci_dev, address, val, len);
> +        }
> +        break;
> +
> +    case PCI_CAP_ID_VNDR:
> +        cap = pci_find_capability(pci_dev, cap_id);
> +        if (address - cap > PCI_CAP_FLAGS) {
> +            assigned_dev_pci_write(pci_dev, address, val, len);
> +        }
> +        break;

I have a feeling we should use overlap functions instead of
address math. What do you think?
Also - put cap offsets in assigned device structure to avoid
find calls?

>      }
>      return;
>  }
> @@ -1340,6 +1399,84 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>  #endif
>  #endif
>  
> +    /* Minimal PM support, make the state bits writable so the guest
> +     * thinks it's doing something. */
> +    if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PM))) {
> +        uint16_t pmc, pmcsr;
> +        pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_PM, pos,
> +                                     PCI_PM_SIZEOF);
> +
> +        pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
> +        pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
> +        pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
> +
> +        pmcsr = pci_get_word(pci_dev->config + pos + PCI_PM_CTRL);
> +        pmcsr &= (PCI_PM_CTRL_STATE_MASK);
> +        pmcsr |= PCI_PM_CTRL_NO_SOFT_RST;
> +        pci_set_word(pci_dev->config + pos + PCI_PM_CTRL, pmcsr);
> +
> +        pci_set_word(pci_dev->wmask + pos + PCI_PM_CTRL,
> +                     PCI_PM_CTRL_STATE_MASK);
> +    }

we don't pass anything to device. So - can this be put in pci_pm.c
so that emulated devices can use this too?

> +
> +    if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_EXP))) {
> +        uint16_t devctl, lnkcap, lnksta;
> +
> +        pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_EXP, pos, 0x40);
> +
> +        devctl = pci_get_word(pci_dev->config + pos + PCI_EXP_DEVCTL);
> +        devctl = (devctl & (PCI_EXP_DEVCTL_READRQ | PCI_EXP_DEVCTL_PAYLOAD)) |
> +                  PCI_EXP_DEVCTL_RELAX_EN | PCI_EXP_DEVCTL_NOSNOOP_EN;
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_DEVCTL, devctl);
> +        devctl = PCI_EXP_DEVCTL_BCR_FLR | PCI_EXP_DEVCTL_AUX_PME;
> +        pci_set_word(pci_dev->wmask + pos + PCI_EXP_DEVCTL, ~devctl);
> +
> +        lnkcap = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKCAP);
> +        lnkcap &= (PCI_EXP_LNKCAP_SLS | PCI_EXP_LNKCAP_MLW |
> +                   PCI_EXP_LNKCAP_ASPMS | PCI_EXP_LNKCAP_L0SEL |
> +                   PCI_EXP_LNKCAP_L1EL);
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_LNKCAP, lnkcap);
> +
> +        pci_set_word(pci_dev->wmask + pos + PCI_EXP_LNKCAP,
> +                     PCI_EXP_LNKCTL_ASPMC | PCI_EXP_LNKCTL_RCB |
> +                     PCI_EXP_LNKCTL_CCC | PCI_EXP_LNKCTL_ES |
> +                     PCI_EXP_LNKCTL_CLKREQ_EN | PCI_EXP_LNKCTL_HAWD);
> +        
> +        lnksta = pci_get_word(pci_dev->config + pos + PCI_EXP_LNKSTA);
> +        lnksta &= (PCI_EXP_LNKSTA_CLS | PCI_EXP_LNKSTA_NLW);
> +        pci_set_word(pci_dev->config + pos + PCI_EXP_LNKSTA, lnksta);
> +
> +        pci_set_word(pci_dev->wmask + pos + 0x28, 0x1f);
> +        pci_set_word(pci_dev->wmask + pos + 0x30, 0xfbf);

This seems to overlap functionally with the express work upstream.
Can code from there be reused?  I also wonder whether is affects the
guest OS if it finds an express device on a non-express bridge.

> +    }
> +
> +    if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VPD))) {
> +        pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_VPD, pos, 0x8);
> +    }
> +
> +    if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR))) {
> +        uint8_t len = pci_get_byte(pci_dev->config + pos + PCI_CAP_FLAGS);
> +        pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_VNDR, pos, len);
> +    }
> +
> +    if ((pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_PCIX))) {
> +        uint16_t cmd;
> +        uint32_t status;
> +
> +        pci_add_capability_at_offset(pci_dev, PCI_CAP_ID_PCIX, pos, 0x8);
> +
> +        cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
> +        cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
> +                PCI_X_CMD_MAX_SPLIT);
> +        pci_set_word(pci_dev->config + pos + PCI_X_CMD, cmd);
> +
> +        status = pci_get_long(pci_dev->config + pos + PCI_X_STATUS);
> +        status &= ~(PCI_X_STATUS_BUS | PCI_X_STATUS_DEVFN);
> +        status |= (pci_bus_num(pci_dev->bus) << 8) | pci_dev->devfn;
> +        status &= ~(PCI_X_STATUS_SPL_DISC | PCI_X_STATUS_UNX_SPL |
> +                    PCI_X_STATUS_SPL_ERR);
> +        pci_set_long(pci_dev->config + pos + PCI_X_STATUS, status);
> +    }

This will be handy for non-assignment case so
I'd like to see this moved out of device-assignment.c:
we could create pcix.c or just add to pci.c.

>      return 0;
>  }
>  
> @@ -1466,7 +1603,8 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
>      dev->h_busnr = dev->host.bus;
>      dev->h_devfn = PCI_DEVFN(dev->host.dev, dev->host.func);
>  
> -    pci_register_capability_handlers(pci_dev, NULL,
> +    pci_register_capability_handlers(pci_dev,
> +                                     assigned_device_pci_cap_read_config,
>                                       assigned_device_pci_cap_write_config);

Maybe these could go away?

>  
>      if (assigned_device_pci_cap_init(pci_dev) < 0)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux