Re: [PATCH 12/13] pci-assign: Generic config space access management

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2011-06-28 10:10, Michael S. Tsirkin wrote:
> On Mon, Jun 27, 2011 at 08:19:55PM +0200, Jan Kiszka wrote:
>> From: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
>>
>> This drastically simplifies config space access management: Instead of
>> coding various range checks and merging bits, set up two access control
>> bitmaps. One defines, which bits can be directly read from the device,
>> the other allows direct write to the device, also with bit-granularity.
>>
>> The setup of those bitmaps, specifically the corner cases like the
>> capability list status bit, is way more readable than the existing code.
>> And the generic config access functions just need one additional logic
>> to catch writes to the MSI/MSI-X control flags and call the related
>> update handlers.
>>
>> Signed-off-by: Jan Kiszka <jan.kiszka@xxxxxxxxxxx>
>> ---
>>  hw/device-assignment.c |  325 ++++++++++++++++-------------------------------
>>  hw/device-assignment.h |    3 +-
>>  2 files changed, 113 insertions(+), 215 deletions(-)
>>
>> diff --git a/hw/device-assignment.c b/hw/device-assignment.c
>> index 6a2ca8c..be199d2 100644
>> --- a/hw/device-assignment.c
>> +++ b/hw/device-assignment.c
>> @@ -63,35 +63,6 @@ static void assigned_dev_load_option_rom(AssignedDevice *dev);
>>  
>>  static void assigned_dev_unregister_msix_mmio(AssignedDevice *dev);
>>  
>> -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
>> -                                                 uint32_t address,
>> -                                                 uint32_t val, int len);
>> -
>> -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
>> -                                                    uint32_t address, int len);
>> -
>> -/* Merge the bits set in mask from mval into val.  Both val and mval are
>> - * at the same addr offset, pos is the starting offset of the mask. */
>> -static uint32_t merge_bits(uint32_t val, uint32_t mval, uint8_t addr,
>> -                           int len, uint8_t pos, uint32_t mask)
>> -{
>> -    if (!ranges_overlap(addr, len, pos, 4)) {
>> -        return val;
>> -    }
>> -
>> -    if (addr >= pos) {
>> -        mask >>= (addr - pos) * 8;
>> -    } else {
>> -        mask <<= (pos - addr) * 8;
>> -    }
>> -    mask &= 0xffffffffU >> (4 - len) * 8;
>> -
>> -    val &= ~mask;
>> -    val |= (mval & mask);
>> -
>> -    return val;
>> -}
>> -
>>  static uint32_t assigned_dev_ioport_rw(AssignedDevRegion *dev_region,
>>                                         uint32_t addr, int len, uint32_t *val)
>>  {
>> @@ -404,143 +375,6 @@ static uint8_t pci_find_cap_offset(PCIDevice *d, uint8_t cap, uint8_t start)
>>      return 0;
>>  }
>>  
>> -static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
>> -                                          uint32_t val, int len)
>> -{
>> -    int fd;
>> -    ssize_t ret;
>> -    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
>> -
>> -    DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>> -          ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
>> -          (uint16_t) address, val, len);
>> -
>> -    if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
>> -        return assigned_device_pci_cap_write_config(d, address, val, len);
>> -    }
>> -
>> -    if (ranges_overlap(address, len, PCI_COMMAND, 2)) {
>> -        pci_default_write_config(d, address, val, len);
>> -        /* Continue to program the card */
>> -    }
>> -
>> -    /*
>> -     * Catch access to
>> -     *  - base address registers
>> -     *  - ROM base address & capability pointer
>> -     *  - interrupt line & pin
>> -     */
>> -    if (ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
>> -        ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
>> -        pci_default_write_config(d, address, val, len);
>> -        return;
>> -    } else if (ranges_overlap(address, len, PCI_CAPABILITY_LIST, 1) ||
>> -               ranges_overlap(address, len, PCI_INTERRUPT_LINE, 2)) {
>> -        uint32_t real_val;
>> -
>> -        pci_default_write_config(d, address, val, len);
>> -
>> -        /* Ensure that writes to overlapping areas we don't virtualize still
>> -         * hit the device. */
>> -        real_val = assigned_dev_pci_read(d, address, len);
>> -        val = merge_bits(val, real_val, address, len,
>> -                         PCI_CAPABILITY_LIST, 0xff);
>> -        val = merge_bits(val, real_val, address, len,
>> -                         PCI_INTERRUPT_LINE, 0xffff);
>> -    }
>> -
>> -    DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
>> -          ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
>> -          (uint16_t) address, val, len);
>> -
>> -    fd = pci_dev->real_device.config_fd;
>> -
>> -again:
>> -    ret = pwrite(fd, &val, len, address);
>> -    if (ret != len) {
>> -	if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
>> -	    goto again;
>> -
>> -	fprintf(stderr, "%s: pwrite failed, ret = %zd errno = %d\n",
>> -		__func__, ret, errno);
>> -
>> -	exit(1);
>> -    }
>> -}
>> -
>> -static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address,
>> -                                             int len)
>> -{
>> -    uint32_t val = 0, virt_val;
>> -    int fd;
>> -    ssize_t ret;
>> -    AssignedDevice *pci_dev = DO_UPCAST(AssignedDevice, dev, d);
>> -
>> -    if (address >= PCI_CONFIG_HEADER_SIZE && d->config_map[address]) {
>> -        val = assigned_device_pci_cap_read_config(d, address, len);
>> -        DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>> -              (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
>> -        return val;
>> -    }
>> -
>> -    /*
>> -     * Catch access to
>> -     *  - vendor & device ID
>> -     *  - base address registers
>> -     *  - ROM base address
>> -     */
>> -    if (ranges_overlap(address, len, PCI_VENDOR_ID, 4) ||
>> -        ranges_overlap(address, len, PCI_BASE_ADDRESS_0, 24) ||
>> -        ranges_overlap(address, len, PCI_ROM_ADDRESS, 4)) {
>> -        val = pci_default_read_config(d, address, len);
>> -        DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>> -              (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
>> -        return val;
>> -    }
>> -
>> -    fd = pci_dev->real_device.config_fd;
>> -
>> -again:
>> -    ret = pread(fd, &val, len, address);
>> -    if (ret != len) {
>> -	if ((ret < 0) && (errno == EINTR || errno == EAGAIN))
>> -	    goto again;
>> -
>> -	fprintf(stderr, "%s: pread failed, ret = %zd errno = %d\n",
>> -		__func__, ret, errno);
>> -
>> -	exit(1);
>> -    }
>> -
>> -    DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
>> -          (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
>> -
>> -    if (pci_dev->emulate_cmd_mask) {
>> -        val = merge_bits(val, pci_default_read_config(d, address, len),
>> -                         address, len, PCI_COMMAND, pci_dev->emulate_cmd_mask);
>> -    }
>> -
>> -    /*
>> -     * Merge bits from virtualized
>> -     *  - capability pointer
>> -     *  - interrupt line & pin
>> -     */
>> -    virt_val = pci_default_read_config(d, address, len);
>> -    val = merge_bits(val, virt_val, address, len, PCI_CAPABILITY_LIST, 0xff);
>> -    val = merge_bits(val, virt_val, address, len, PCI_INTERRUPT_LINE, 0xffff);
>> -
>> -    if (!pci_dev->cap.available) {
>> -        /* kill the special capabilities */
>> -        if (address == PCI_COMMAND && len == 4) {
>> -            val &= ~(PCI_STATUS_CAP_LIST << 16);
>> -        } else if (address == PCI_STATUS) {
>> -            val &= ~PCI_STATUS_CAP_LIST;
>> -        }
>> -    }
>> -
>> -    return val;
>> -}
>> -
>>  static int assigned_dev_register_regions(PCIRegion *io_regions,
>>                                           unsigned long regions_num,
>>                                           AssignedDevice *pci_dev)
>> @@ -790,7 +624,8 @@ again:
>>      /* dealing with virtual function device */
>>      snprintf(name, sizeof(name), "%sphysfn/", dir);
>>      if (!stat(name, &statbuf)) {
>> -        pci_dev->emulate_cmd_mask = 0xffff;
>> +        /* always provide the written value on readout */
>> +        memset(pci_dev->direct_config_read + PCI_COMMAND, 0xff, 2);
>>      }
>>  
>>      dev->region_number = r;
>> @@ -1268,73 +1103,81 @@ static void assigned_dev_update_msix(PCIDevice *pci_dev)
>>      }
>>  }
>>  
>> -/* There can be multiple VNDR capabilities per device, we need to find the
>> - * one that starts closet to the given address without going over. */
>> -static uint8_t find_vndr_start(PCIDevice *pci_dev, uint32_t address)
>> +static uint32_t assigned_dev_pci_read_config(PCIDevice *pci_dev,
>> +                                             uint32_t address, int len)
>>  {
>> -    uint8_t cap, pos;
>> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
>> +    uint32_t virt_val = pci_default_read_config(pci_dev, address, len);
>> +    uint32_t real_val, direct_mask;
>>  
>> -    for (cap = pos = 0;
>> -         (pos = pci_find_cap_offset(pci_dev, PCI_CAP_ID_VNDR, pos));
>> -         pos += PCI_CAP_LIST_NEXT) {
>> -        if (pos <= address) {
>> -            cap = MAX(pos, cap);
>> -        }
>> +    switch (len) {
>> +    case 4:
>> +        direct_mask =
>> +            pci_get_long(assigned_dev->direct_config_read + address);
>> +        break;
>> +    case 2:
>> +        direct_mask =
>> +            pci_get_word(assigned_dev->direct_config_read + address);
>> +        break;
>> +    case 1:
>> +        direct_mask =
>> +            pci_get_byte(assigned_dev->direct_config_read + address);
>> +        break;
>> +    default:
>> +        abort();
>>      }
>> -    return cap;
>> -}
>> -
>> -static uint32_t assigned_device_pci_cap_read_config(PCIDevice *pci_dev,
>> -                                                    uint32_t address, int len)
>> -{
>> -    uint8_t cap, cap_id = pci_dev->config_map[address];
>> -    uint32_t val;
>> -
>> -    switch (cap_id) {
>> -
>> -    case PCI_CAP_ID_VPD:
>> -        cap = pci_find_capability(pci_dev, cap_id);
>> -        val = assigned_dev_pci_read(pci_dev, address, len);
>> -        return merge_bits(val, pci_get_long(pci_dev->config + address),
>> -                          address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
>> -
>> -    case PCI_CAP_ID_VNDR:
>> -        cap = find_vndr_start(pci_dev, address);
>> -        val = assigned_dev_pci_read(pci_dev, address, len);
>> -        return merge_bits(val, pci_get_long(pci_dev->config + address),
>> -                          address, len, cap + PCI_CAP_LIST_NEXT, 0xff);
>> +    if (direct_mask) {
>> +        real_val = assigned_dev_pci_read(pci_dev, address, len);
>> +        return (virt_val & ~direct_mask) | (real_val & direct_mask);
>> +    } else {
>> +        return virt_val;
>>      }
>> -
>> -    return pci_default_read_config(pci_dev, address, len);
>>  }
>>  
>> -static void assigned_device_pci_cap_write_config(PCIDevice *pci_dev,
>> -                                                 uint32_t address,
>> -                                                 uint32_t val, int len)
>> +static void assigned_dev_pci_write_config(PCIDevice *pci_dev, uint32_t address,
>> +                                          uint32_t val, int len)
>>  {
>> -    uint8_t cap_id = pci_dev->config_map[address];
>> +    AssignedDevice *assigned_dev = DO_UPCAST(AssignedDevice, dev, pci_dev);
>> +    uint32_t direct_mask, full_access;
>>  
>>      pci_default_write_config(pci_dev, address, val, len);
>> -    switch (cap_id) {
>> -    case PCI_CAP_ID_MSI:
>> +
>> +    if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSI) {
>>          if (range_covers_byte(address, len,
>>                                pci_dev->msi_cap + PCI_MSI_FLAGS)) {
>>              assigned_dev_update_msi(pci_dev);
>>          }
>> -        break;
>> -
>> -    case PCI_CAP_ID_MSIX:
>> +    } else if (assigned_dev->cap.available & ASSIGNED_DEVICE_CAP_MSIX) {
>>          if (range_covers_byte(address, len,
>>                                pci_dev->msix_cap + PCI_MSIX_FLAGS + 1)) {
>>              assigned_dev_update_msix(pci_dev);
>>          }
>> -        break;
>> +    }
>>  
>> -    case PCI_CAP_ID_VPD:
>> -    case PCI_CAP_ID_VNDR:
>> -        assigned_dev_pci_write(pci_dev, address, val, len);
>> +    switch (len) {
>> +    case 4:
>> +        direct_mask =
>> +            pci_get_long(assigned_dev->direct_config_write + address);
>> +        full_access = 0xffffffff;
>> +        break;
>> +    case 2:
>> +        direct_mask =
>> +            pci_get_word(assigned_dev->direct_config_write + address);
>> +        full_access = 0xffff;
>>          break;
>> +    case 1:
>> +        direct_mask =
>> +            pci_get_byte(assigned_dev->direct_config_write + address);
>> +        full_access = 0xff;
>> +        break;
>> +    default:
>> +        abort();
>> +    }
>> +    if (direct_mask != full_access) {
>> +        val &= direct_mask;
>> +        val |= assigned_dev_pci_read(pci_dev, address, len) & ~direct_mask;
>>      }
>> +    assigned_dev_pci_write(pci_dev, address, val, len);
> 
> I think we should not write at all for fully virtualized fields
> (direct_mask == 0). E.g. PCI BARs.


Yes, Alex caught that as well. Fixed already.

> 
>>  }
>>  
>>  static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>> @@ -1404,6 +1247,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>>              return ret;
>>          }
>>  
>> +        /* direct read except for next cap pointer */
>> +        memset(dev->direct_config_read + pos, 0xff, PCI_PM_SIZEOF);
>> +        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
>> +
>>          pmc = pci_get_word(pci_dev->config + pos + PCI_CAP_FLAGS);
>>          pmc &= (PCI_PM_CAP_VER_MASK | PCI_PM_CAP_DSI);
>>          pci_set_word(pci_dev->config + pos + PCI_CAP_FLAGS, pmc);
>> @@ -1438,6 +1285,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>>              return ret;
>>          }
>>  
>> +        /* direct read except for next cap pointer */
>> +        memset(dev->direct_config_read + pos, 0xff, size);
>> +        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
>> +
>>          type = pci_get_word(pci_dev->config + pos + PCI_EXP_FLAGS);
>>          type = (type & PCI_EXP_FLAGS_TYPE) >> 8;
>>          if (type != PCI_EXP_TYPE_ENDPOINT &&
>> @@ -1513,6 +1364,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>>              return ret;
>>          }
>>  
>> +        /* direct read except for next cap pointer */
>> +        memset(dev->direct_config_read + pos, 0xff, 8);
>> +        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
>> +
>>          /* Command register, clear upper bits, including extended modes */
>>          cmd = pci_get_word(pci_dev->config + pos + PCI_X_CMD);
>>          cmd &= (PCI_X_CMD_DPERR_E | PCI_X_CMD_ERO | PCI_X_CMD_MAX_READ |
>> @@ -1534,6 +1389,13 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>>          if ((ret = pci_add_capability(pci_dev, PCI_CAP_ID_VPD, pos, 8)) < 0) {
>>              return ret;
>>          }
>> +
>> +        /* direct read except for next cap pointer */
>> +        memset(dev->direct_config_read + pos, 0xff, 8);
>> +        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
>> +
>> +        /* direct write for cap content */
>> +        memset(dev->direct_config_write + pos + 2, 0xff, 6);
>>      }
>>  
>>      /* Devices can have multiple vendor capabilities, get them all */
>> @@ -1545,6 +1407,21 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev)
>>                                        pos, len)) < 0) {
>>              return ret;
>>          }
>> +
>> +        /* direct read except for next cap pointer */
>> +        memset(dev->direct_config_read + pos, 0xff, len);
>> +        dev->direct_config_read[pos + PCI_CAP_LIST_NEXT] = 0;
> 
> Add a function for the above 3 lines?
> 
>> +
>> +        /* direct write for cap content */
>> +        memset(dev->direct_config_write + pos + 2, 0xff, len - 2);
>> +    }
>> +
>> +    /* If real and virtual capability list status bits differ, virtualize the
>> +     * access. */
>> +    if ((pci_get_word(pci_dev->config + PCI_STATUS) & PCI_STATUS_CAP_LIST) !=
>> +        (assigned_dev_pci_read_byte(pci_dev, PCI_STATUS) &
>> +         PCI_STATUS_CAP_LIST)) {
>> +        dev->direct_config_read[PCI_STATUS] &= ~PCI_STATUS_CAP_LIST;
>>      }
> 
> Is this an optimization? What harm does it do to virtualize always?

Just replicating the old behavior, but I can virtualize it unconditionally.

> 
>>  
>>      return 0;
>> @@ -1694,6 +1571,26 @@ static int assigned_initfn(struct PCIDevice *pci_dev)
>>          return -1;
>>      }
>>  
>> +    /*
>> +     * Set up basic config space access control. Will be further refined during
>> +     * device initialization.
>> +     *
>> +     * Direct read/write access is grangted for:
> 
> typo
> 
>> +     *  - status & command registers, revision ID & class code, cacheline size,
>> +     *    latency timer, header type, BIST
>> +     *  - cardbus CIS pointer, subsystem vendor & ID
>> +     *  - reserved fields
>> +     *  - Min_Gnt & Max_Lat
>> +     */
>> +    memset(dev->direct_config_read + PCI_COMMAND, 0xff, 12);
>> +    memset(dev->direct_config_read + PCI_CARDBUS_CIS, 0xff, 8);
>> +    memset(dev->direct_config_read + PCI_CAPABILITY_LIST + 1, 0xff, 7);
>> +    memset(dev->direct_config_read + PCI_MIN_GNT, 0xff, 2);
>> +    memset(dev->direct_config_write + PCI_COMMAND, 0xff, 12);
>> +    memset(dev->direct_config_write + PCI_CARDBUS_CIS, 0xff, 8);
>> +    memset(dev->direct_config_write + PCI_CAPABILITY_LIST + 1, 0xff, 7);
>> +    memset(dev->direct_config_write + PCI_MIN_GNT, 0xff, 2);
>> +
>>      if (get_real_device(dev, dev->host.seg, dev->host.bus,
>>                          dev->host.dev, dev->host.func)) {
>>          error_report("pci-assign: Error: Couldn't get real device (%s)!",
>> diff --git a/hw/device-assignment.h b/hw/device-assignment.h
>> index 3d20207..ed5defb 100644
>> --- a/hw/device-assignment.h
>> +++ b/hw/device-assignment.h
>> @@ -104,12 +104,13 @@ typedef struct AssignedDevice {
>>  #define ASSIGNED_DEVICE_MSIX_MASKED (1 << 2)
>>          uint32_t state;
>>      } cap;
>> +    uint8_t direct_config_read[PCI_CONFIG_SPACE_SIZE];
>> +    uint8_t direct_config_write[PCI_CONFIG_SPACE_SIZE];
> 
> What direct means isn't obvious. Add a comment?

Ok.

> 
> We could revert the logic, have bits for
> emulate_config_read/emulate_config_write.  I think that most of the
> registers can safely be read/written directly, so protecting relevant
> bits will be less work. Is that true?

I intentionally chose whitelisting to avoid accidentally granting access
to forgotten fields. I don't think it's a good idea to switch to
blacklisting.

Jan

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux