Re: [PATCH] qemu-kvm: device assignment: Enabling MSI-X according to mask bit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thursday 16 December 2010 16:21:52 Sheng Yang wrote:
> The old MSI-X enabling method assume the entries are written before MSI-X
> enabled, but some OS didn't obey this, e.g. FreeBSD. This patch would fix
> this.
> 
> Also, according to the PCI spec, mask bit of MSI-X table should be set
> after reset.

Ping?

--
regards
Yang, Sheng

> 
> Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx>
> ---
>  hw/device-assignment.c |  188
> +++++++++++++++++++++++++++++++++++++++++------- hw/device-
assignment.h | 
>   2 +-
>  2 files changed, 162 insertions(+), 28 deletions(-)
> 
> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
> index 832c236..ed0b491 100644
> --- a/hw/device-assignment.c
> +++ b/hw/device-assignment.c
> @@ -1111,15 +1111,12 @@ static void assigned_dev_update_msi(PCIDevice
> *pci_dev, unsigned int ctrl_pos) #endif
> 
>  #ifdef KVM_CAP_DEVICE_MSIX
> -static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev)
> +
> +#define PCI_MSIX_CTRL_MASKBIT	1ul
> +static int get_msix_entries_max_nr(AssignedDevice *adev)
>  {
> -    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
> -    uint16_t entries_nr = 0, entries_max_nr;
> -    int pos = 0, i, r = 0;
> -    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
> -    struct kvm_assigned_msix_nr msix_nr;
> -    struct kvm_assigned_msix_entry msix_entry;
> -    void *va = adev->msix_table_page;
> +    int pos, entries_max_nr;
> +    PCIDevice *pci_dev = &adev->dev;
> 
>      pos = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
> 
> @@ -1127,20 +1124,48 @@ static int assigned_dev_update_msix_mmio(PCIDevice
> *pci_dev) entries_max_nr &= PCI_MSIX_TABSIZE;
>      entries_max_nr += 1;
> 
> +    return entries_max_nr;
> +}
> +
> +static int assigned_dev_msix_entry_masked(AssignedDevice *adev, int entry)
> +{
> +    uint32_t msg_ctrl;
> +    void *va = adev->msix_table_page;
> +
> +    memcpy(&msg_ctrl, va + entry * 16 + 12, 4);
> +    return (msg_ctrl & PCI_MSIX_CTRL_MASKBIT);
> +}
> +
> +static int get_msix_valid_entries_nr(AssignedDevice *adev,
> +				     uint16_t entries_max_nr)
> +{
> +    void *va = adev->msix_table_page;
> +    uint32_t msg_ctrl;
> +    uint16_t entries_nr = 0;
> +    int i;
> +
>      /* Get the usable entry number for allocating */
>      for (i = 0; i < entries_max_nr; i++) {
>          memcpy(&msg_ctrl, va + i * 16 + 12, 4);
> -        memcpy(&msg_data, va + i * 16 + 8, 4);
>          /* Ignore unused entry even it's unmasked */
> -        if (msg_data == 0)
> +        if (assigned_dev_msix_entry_masked(adev, i))
>              continue;
>          entries_nr ++;
>      }
> +    return entries_nr;
> +}
> +
> +static int assigned_dev_update_msix_mmio(PCIDevice *pci_dev,
> +                                         uint16_t entries_nr,
> +                                         uint16_t entries_max_nr)
> +{
> +    AssignedDevice *adev = container_of(pci_dev, AssignedDevice, dev);
> +    int i, r = 0;
> +    uint32_t msg_addr, msg_upper_addr, msg_data, msg_ctrl;
> +    struct kvm_assigned_msix_nr msix_nr;
> +    struct kvm_assigned_msix_entry msix_entry;
> +    void *va = adev->msix_table_page;
> 
> -    if (entries_nr == 0) {
> -        fprintf(stderr, "MSI-X entry number is zero!\n");
> -        return -EINVAL;
> -    }
>      msix_nr.assigned_dev_id = calc_assigned_dev_id(adev->h_segnr,
> adev->h_busnr, (uint8_t)adev->h_devfn); msix_nr.entry_nr = entries_nr;
> @@ -1152,6 +1177,8 @@ static int assigned_dev_update_msix_mmio(PCIDevice
> *pci_dev) }
> 
>      free_dev_irq_entries(adev);
> +    memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
> +                                       
> sizeof(*pci_dev->msix_entry_used)); adev->irq_entries_nr = entries_nr;
>      adev->entry = calloc(entries_nr, sizeof(struct
> kvm_irq_routing_entry)); if (!adev->entry) {
> @@ -1165,10 +1192,10 @@ static int assigned_dev_update_msix_mmio(PCIDevice
> *pci_dev) if (entries_nr >= msix_nr.entry_nr)
>              break;
>          memcpy(&msg_ctrl, va + i * 16 + 12, 4);
> -        memcpy(&msg_data, va + i * 16 + 8, 4);
> -        if (msg_data == 0)
> +        if (assigned_dev_msix_entry_masked(adev, i))
>              continue;
> 
> +        memcpy(&msg_data, va + i * 16 + 8, 4);
>          memcpy(&msg_addr, va + i * 16, 4);
>          memcpy(&msg_upper_addr, va + i * 16 + 4, 4);
> 
> @@ -1182,17 +1209,18 @@ static int assigned_dev_update_msix_mmio(PCIDevice
> *pci_dev) adev->entry[entries_nr].u.msi.address_lo = msg_addr;
>          adev->entry[entries_nr].u.msi.address_hi = msg_upper_addr;
>          adev->entry[entries_nr].u.msi.data = msg_data;
> -        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x\n!", msg_data,
> msg_addr); -	kvm_add_routing_entry(&adev->entry[entries_nr]);
> +        DEBUG("MSI-X data 0x%x, MSI-X addr_lo 0x%x!\n", msg_data,
> msg_addr); +        kvm_add_routing_entry(&adev->entry[entries_nr]);
> 
>          msix_entry.gsi = adev->entry[entries_nr].gsi;
>          msix_entry.entry = i;
> +        pci_dev->msix_entry_used[i] = 1;
>          r = kvm_assign_set_msix_entry(kvm_context, &msix_entry);
>          if (r) {
>              fprintf(stderr, "fail to set MSI-X entry! %s\n",
> strerror(-r)); break;
>          }
> -        DEBUG("MSI-X entry gsi 0x%x, entry %d\n!",
> +        DEBUG("MSI-X entry gsi 0x%x, entry %d!\n",
>                  msix_entry.gsi, msix_entry.entry);
>          entries_nr ++;
>      }
> @@ -1209,20 +1237,24 @@ static void assigned_dev_update_msix(PCIDevice
> *pci_dev, unsigned int ctrl_pos) {
>      struct kvm_assigned_irq assigned_irq_data;
>      AssignedDevice *assigned_dev = container_of(pci_dev, AssignedDevice,
> dev); -    uint16_t *ctrl_word = (uint16_t *)(pci_dev->config + ctrl_pos);
> +    uint16_t ctrl_word = *(uint16_t *)(pci_dev->config + ctrl_pos); int
> r;
> +    uint16_t entries_nr, entries_max_nr;
> +    int enable_msix;
> 
>      memset(&assigned_irq_data, 0, sizeof assigned_irq_data);
>      assigned_irq_data.assigned_dev_id  =
>              calc_assigned_dev_id(assigned_dev->h_segnr,
> assigned_dev->h_busnr, (uint8_t)assigned_dev->h_devfn);
> 
> +    enable_msix = ((ctrl_word & PCI_MSIX_ENABLE) &&
> +                  !(ctrl_word & PCI_MSIX_MASK));
> +
>      /* Some guests gratuitously disable MSIX even if they're not using it,
>       * try to catch this by only deassigning irqs if the guest is using
>       * MSIX or intends to start. */
>      if ((assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MSIX) ||
> -        (*ctrl_word & PCI_MSIX_ENABLE)) {
> -
> +            enable_msix) {
>          assigned_irq_data.flags = assigned_dev->irq_requested_type;
>          free_dev_irq_entries(assigned_dev);
>          r = kvm_deassign_irq(kvm_context, &assigned_irq_data);
> @@ -1231,16 +1263,30 @@ static void assigned_dev_update_msix(PCIDevice
> *pci_dev, unsigned int ctrl_pos) perror("assigned_dev_update_msix:
> deassign irq");
> 
>          assigned_dev->irq_requested_type = 0;
> +        memset(pci_dev->msix_entry_used, 0, KVM_MAX_MSIX_PER_DEV *
> +                                       
> sizeof(*pci_dev->msix_entry_used)); }
> 
> -    if (*ctrl_word & PCI_MSIX_ENABLE) {
> -        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
> -                                  KVM_DEV_IRQ_GUEST_MSIX;
> -
> -        if (assigned_dev_update_msix_mmio(pci_dev) < 0) {
> +    entries_max_nr = assigned_dev->max_msix_entries_nr;
> +    if (entries_max_nr == 0) {
> +        fprintf(stderr, "assigned_dev_update_msix: MSI-X entries_max_nr ==
> 0"); +        return;
> +    }
> +    /*
> +     * Guest may try to enable MSI-X before setting MSI-X entry done, so
> +     * let's wait until guest unmask the entries.
> +     */
> +    entries_nr = get_msix_valid_entries_nr(assigned_dev, entries_max_nr);
> +    if (entries_nr == 0)
> +        return;
> +    if (enable_msix) {
> +        if (assigned_dev_update_msix_mmio(pci_dev,
> +                    entries_nr, entries_max_nr) < 0) {
>              perror("assigned_dev_update_msix_mmio");
>              return;
>          }
> +        assigned_irq_data.flags = KVM_DEV_IRQ_HOST_MSIX |
> +                                  KVM_DEV_IRQ_GUEST_MSIX;
>          if (kvm_assign_irq(kvm_context, &assigned_irq_data) < 0) {
>              perror("assigned_dev_enable_msix: assign irq");
>              return;
> @@ -1341,6 +1387,7 @@ static int assigned_device_pci_cap_init(PCIDevice
> *pci_dev) bar_nr = msix_table_entry & PCI_MSIX_BIR;
>          msix_table_entry &= ~PCI_MSIX_BIR;
>          dev->msix_table_addr = pci_region[bar_nr].base_addr +
> msix_table_entry; +        dev->max_msix_entries_nr =
> get_msix_entries_max_nr(dev);
>      }
>  #endif
>  #endif
> @@ -1378,10 +1425,90 @@ static void msix_mmio_writel(void *opaque,
>      AssignedDevice *adev = opaque;
>      unsigned int offset = addr & 0xfff;
>      void *page = adev->msix_table_page;
> +    int ctrl_word, index;
> +    struct kvm_irq_routing_entry new_entry = {};
> +    int entry_idx, entries_max_nr, r = 0, i;
> +    uint32_t msg_ctrl, msg_data, msg_upper_addr, msg_addr;
> +    struct PCIDevice *pci_dev = &adev->dev;
> +    uint8_t cap = pci_find_capability(pci_dev, PCI_CAP_ID_MSIX);
> 
>      DEBUG("write to MSI-X entry table mmio offset 0x%lx, val 0x%x\n",
>  		    addr, val);
>      memcpy((void *)((char *)page + offset), &val, 4);
> +
> +    index = offset / 16;
> +
> +    /* Check if mask bit is being accessed */
> +    memcpy(&msg_addr, (char *)page + index * 16, 4);
> +    memcpy(&msg_upper_addr, (char *)page + index * 16 + 4, 4);
> +    memcpy(&msg_data, (char *)page + index * 16 + 8, 4);
> +    memcpy(&msg_ctrl, (char *)page + index * 16 + 12, 4);
> +    DEBUG("MSI-X entries index %d: "
> +            "msg_addr 0x%x, msg_upper_addr 0x%x, msg_data 0x%x, vec_ctl
> 0x%x\n", +            index, msg_addr, msg_upper_addr, msg_data,
> msg_ctrl); +
> +    ctrl_word = pci_get_word(pci_dev->config + cap + PCI_MSIX_FLAGS);
> +
> +    if (!((ctrl_word & PCI_MSIX_ENABLE) && !(ctrl_word & PCI_MSIX_MASK)))
> +        return;
> +
> +    if (!assigned_dev_msix_entry_masked(adev, index)) {
> +        if (!adev->dev.msix_entry_used[index]) {
> +            DEBUG("Try to modify unenabled MSI-X entry %d's mask. "
> +                    "Reenable MSI-X.\n",
> +                    index);
> +            assigned_dev_update_msix(&adev->dev, cap + PCI_MSIX_FLAGS);
> +        }
> +        return;
> +    }
> +
> +    if (!adev->dev.msix_entry_used[index])
> +        return;
> +
> +    entries_max_nr = adev->max_msix_entries_nr;
> +
> +    /*
> +     * Find the index of routing entry, it can be different from 'index'
> if +     * empty entry existed in between
> +     */
> +    entry_idx = -1;
> +    for (i = 0; i <= index; i++) {
> +        if (adev->dev.msix_entry_used[i])
> +            entry_idx ++;
> +    }
> +    if (entry_idx >= entries_max_nr || entry_idx == -1) {
> +        fprintf(stderr, "msix_mmio_writel: Entry idx %d exceed limit!\n",
> +			entry_idx);
> +        return;
> +    }
> +
> +    if (!assigned_dev_msix_entry_masked(adev, index)) {
> +        fprintf(stderr, "msix_mmio_writel: Trying write to unmasked
> entry!\n"); +        return;
> +    }
> +
> +    new_entry.gsi = adev->entry[entry_idx].gsi;
> +    new_entry.type = KVM_IRQ_ROUTING_MSI;
> +    new_entry.flags = 0;
> +    new_entry.u.msi.address_lo = msg_addr;
> +    new_entry.u.msi.address_hi = msg_upper_addr;
> +    new_entry.u.msi.data = msg_data;
> +    if (memcmp(&adev->entry[entry_idx].u.msi, &new_entry.u.msi,
> +                sizeof new_entry.u.msi)) {
> +        r = kvm_update_routing_entry(&adev->entry[entry_idx], &new_entry);
> +        if (r) {
> +            perror("msix_mmio_writel: kvm_update_routing_entry failed\n");
> +            return;
> +        }
> +        r = kvm_commit_irq_routes();
> +        if (r) {
> +            perror("msix_mmio_writel: kvm_commit_irq_routes failed\n");
> +            return;
> +        }
> +    }
> +    adev->entry[entry_idx].u.msi.address_lo = msg_addr;
> +    adev->entry[entry_idx].u.msi.address_hi = msg_upper_addr;
> +    adev->entry[entry_idx].u.msi.data = msg_data;
>  }
> 
>  static void msix_mmio_writew(void *opaque,
> @@ -1408,6 +1535,7 @@ static CPUReadMemoryFunc *msix_mmio_read[] = {
> 
>  static int assigned_dev_register_msix_mmio(AssignedDevice *dev)
>  {
> +    int i;
>      dev->msix_table_page = mmap(NULL, 0x1000,
>                                  PROT_READ|PROT_WRITE,
>                                  MAP_ANONYMOUS|MAP_PRIVATE, 0, 0);
> @@ -1417,8 +1545,12 @@ static int
> assigned_dev_register_msix_mmio(AssignedDevice *dev) return -EFAULT;
>      }
>      memset(dev->msix_table_page, 0, 0x1000);
> +    for (i = 0; i < 0x1000; i += 0x10)
> +        *(uint32_t *)(dev->msix_table_page + i + 0xc) = 1;
>      dev->mmio_index = cpu_register_io_memory(
>                          msix_mmio_read, msix_mmio_write, dev);
> +    dev->dev.msix_entry_used = qemu_mallocz(KVM_MAX_MSIX_PER_DEV *
> +                                        sizeof *dev->dev.msix_entry_used);
>      return 0;
>  }
> 
> @@ -1435,6 +1567,8 @@ static void
> assigned_dev_unregister_msix_mmio(AssignedDevice *dev) strerror(errno));
>      }
>      dev->msix_table_page = NULL;
> +    free(dev->dev.msix_entry_used);
> +    dev->dev.msix_entry_used = NULL;
>  }
> 
>  static const VMStateDescription vmstate_assigned_device = {
> diff --git a/hw/device-assignment.h b/hw/device-assignment.h
> index c94a730..754e5c0 100644
> --- a/hw/device-assignment.h
> +++ b/hw/device-assignment.h
> @@ -104,7 +104,7 @@ typedef struct AssignedDevice {
>  #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
>          uint32_t state;
>      } cap;
> -    int irq_entries_nr;
> +    int irq_entries_nr, max_msix_entries_nr;
>      struct kvm_irq_routing_entry *entry;
>      void *msix_table_page;
>      target_phys_addr_t msix_table_addr;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux