On 10/04/2013 01:39 AM, Alex Williamson wrote: > During vfio-pci initfn, the device is not always in a state where the > option ROM can be read. In the case of graphics cards, there's often > no per function reset, which means we have host driver state affecting > whether the option ROM is usable. Ideally we want to move reading the > option ROM past any co-assigned device resets to the point where the > guest first tries to read the ROM itself. > > To accomplish this, we switch the memory region for the option rom to > an I/O region rather than a memory mapped region. This has the side > benefit that we don't waste KVM memory slots for a BAR where we don't > care about performance. This also allows us to delay loading the ROM > from the device until the first read by the guest. We then use the > PCI config space size of the ROM BAR when setting up the BAR through > QEMU PCI. > > Another benefit of this approach is that previously when a user set > the ROM to a file using the romfile= option, we still probed VFIO for > the parameters of the ROM, which can result in dmesg errors about an > invalid ROM. We now only probe VFIO to get the ROM contents if the > guest actually tries to read the ROM. > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > --- > hw/misc/vfio.c | 184 +++++++++++++++++++++++++++++++++++++------------------- > 1 file changed, 122 insertions(+), 62 deletions(-) > > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c > index ede026d..730dec5 100644 > --- a/hw/misc/vfio.c > +++ b/hw/misc/vfio.c > @@ -166,6 +166,7 @@ typedef struct VFIODevice { > off_t config_offset; /* Offset of config space region within device fd */ > unsigned int rom_size; > off_t rom_offset; /* Offset of ROM region within device fd */ > + void *rom; > int msi_cap_size; > VFIOMSIVector *msi_vectors; > VFIOMSIXInfo *msix; > @@ -1058,6 +1059,125 @@ static const MemoryRegionOps vfio_bar_ops = { > .endianness = DEVICE_LITTLE_ENDIAN, > }; > > +static void vfio_pci_load_rom(VFIODevice *vdev) > +{ > + struct vfio_region_info reg_info = { > + .argsz = sizeof(reg_info), > + .index = VFIO_PCI_ROM_REGION_INDEX > + }; > + uint64_t size; > + off_t off = 0; > + size_t bytes; > + > + if (ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { > + error_report("vfio: Error getting ROM info: %m"); > + return; > + } > + > + DPRINTF("Device %04x:%02x:%02x.%x ROM:\n", vdev->host.domain, > + vdev->host.bus, vdev->host.slot, vdev->host.function); > + DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n", > + (unsigned long)reg_info.size, (unsigned long)reg_info.offset, > + (unsigned long)reg_info.flags); > + > + vdev->rom_size = size = reg_info.size; > + vdev->rom_offset = reg_info.offset; > + > + if (!vdev->rom_size) { > + return; > + } > + > + vdev->rom = g_malloc(size); > + memset(vdev->rom, 0xff, size); > + > + while (size) { > + bytes = pread(vdev->fd, vdev->rom + off, size, vdev->rom_offset + off); > + if (bytes == 0) { > + break; > + } else if (bytes > 0) { > + off += bytes; > + size -= bytes; > + } else { > + if (errno == EINTR || errno == EAGAIN) { > + continue; > + } > + error_report("vfio: Error reading device ROM: %m"); > + break; > + } > + } > +} > + > +static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) > +{ > + VFIODevice *vdev = opaque; > + uint64_t val = ((uint64_t)1 << (size * 8)) - 1; > + > + /* Load the ROM lazily when the guest tries to read it */ > + if (unlikely(!vdev->rom)) { > + vfio_pci_load_rom(vdev); > + } > + > + memcpy(&val, vdev->rom + addr, > + (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0); > + > + DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n", > + __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function, addr, size, val); > + > + return val; > +} > + > +static const MemoryRegionOps vfio_rom_ops = { > + .read = vfio_rom_read, > + .endianness = DEVICE_LITTLE_ENDIAN, > +}; > + > +static void vfio_pci_size_rom(VFIODevice *vdev) > +{ > + uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK; > + off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; > + char name[32]; > + > + if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { > + return; > + } > + > + /* > + * Use the same size ROM BAR as the physical device. The contents > + * will get filled in later when the guest tries to read it. > + */ > + if (pread(vdev->fd, &orig, 4, offset) != 4 || > + pwrite(vdev->fd, &size, 4, offset) != 4 || > + pread(vdev->fd, &size, 4, offset) != 4 || > + pwrite(vdev->fd, &orig, 4, offset) != 4) { Do not you want to do byteswapping here? This chunk fails on powerpc64. The rest works (with the corresponding kernel changes). -- Alexey -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html