On Fri, 2013-10-04 at 22:13 +1000, Alexey Kardashevskiy wrote: > On 10/04/2013 01:39 AM, Alex Williamson wrote: > > During vfio-pci initfn, the device is not always in a state where the > > option ROM can be read. In the case of graphics cards, there's often > > no per function reset, which means we have host driver state affecting > > whether the option ROM is usable. Ideally we want to move reading the > > option ROM past any co-assigned device resets to the point where the > > guest first tries to read the ROM itself. > > > > To accomplish this, we switch the memory region for the option rom to > > an I/O region rather than a memory mapped region. This has the side > > benefit that we don't waste KVM memory slots for a BAR where we don't > > care about performance. This also allows us to delay loading the ROM > > from the device until the first read by the guest. We then use the > > PCI config space size of the ROM BAR when setting up the BAR through > > QEMU PCI. > > > > Another benefit of this approach is that previously when a user set > > the ROM to a file using the romfile= option, we still probed VFIO for > > the parameters of the ROM, which can result in dmesg errors about an > > invalid ROM. We now only probe VFIO to get the ROM contents if the > > guest actually tries to read the ROM. > > > > Signed-off-by: Alex Williamson <alex.williamson@xxxxxxxxxx> > > --- > > hw/misc/vfio.c | 184 +++++++++++++++++++++++++++++++++++++------------------- > > 1 file changed, 122 insertions(+), 62 deletions(-) > > > > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c > > index ede026d..730dec5 100644 > > --- a/hw/misc/vfio.c > > +++ b/hw/misc/vfio.c > > @@ -166,6 +166,7 @@ typedef struct VFIODevice { > > off_t config_offset; /* Offset of config space region within device fd */ > > unsigned int rom_size; > > off_t rom_offset; /* Offset of ROM region within device fd */ > > + void *rom; > > int msi_cap_size; > > VFIOMSIVector *msi_vectors; > > VFIOMSIXInfo *msix; > > @@ -1058,6 +1059,125 @@ static const MemoryRegionOps vfio_bar_ops = { > > .endianness = DEVICE_LITTLE_ENDIAN, > > }; > > > > +static void vfio_pci_load_rom(VFIODevice *vdev) > > +{ > > + struct vfio_region_info reg_info = { > > + .argsz = sizeof(reg_info), > > + .index = VFIO_PCI_ROM_REGION_INDEX > > + }; > > + uint64_t size; > > + off_t off = 0; > > + size_t bytes; > > + > > + if (ioctl(vdev->fd, VFIO_DEVICE_GET_REGION_INFO, ®_info)) { > > + error_report("vfio: Error getting ROM info: %m"); > > + return; > > + } > > + > > + DPRINTF("Device %04x:%02x:%02x.%x ROM:\n", vdev->host.domain, > > + vdev->host.bus, vdev->host.slot, vdev->host.function); > > + DPRINTF(" size: 0x%lx, offset: 0x%lx, flags: 0x%lx\n", > > + (unsigned long)reg_info.size, (unsigned long)reg_info.offset, > > + (unsigned long)reg_info.flags); > > + > > + vdev->rom_size = size = reg_info.size; > > + vdev->rom_offset = reg_info.offset; > > + > > + if (!vdev->rom_size) { > > + return; > > + } > > + > > + vdev->rom = g_malloc(size); > > + memset(vdev->rom, 0xff, size); > > + > > + while (size) { > > + bytes = pread(vdev->fd, vdev->rom + off, size, vdev->rom_offset + off); > > + if (bytes == 0) { > > + break; > > + } else if (bytes > 0) { > > + off += bytes; > > + size -= bytes; > > + } else { > > + if (errno == EINTR || errno == EAGAIN) { > > + continue; > > + } > > + error_report("vfio: Error reading device ROM: %m"); > > + break; > > + } > > + } > > +} > > + > > +static uint64_t vfio_rom_read(void *opaque, hwaddr addr, unsigned size) > > +{ > > + VFIODevice *vdev = opaque; > > + uint64_t val = ((uint64_t)1 << (size * 8)) - 1; > > + > > + /* Load the ROM lazily when the guest tries to read it */ > > + if (unlikely(!vdev->rom)) { > > + vfio_pci_load_rom(vdev); > > + } > > + > > + memcpy(&val, vdev->rom + addr, > > + (addr < vdev->rom_size) ? MIN(size, vdev->rom_size - addr) : 0); > > + > > + DPRINTF("%s(%04x:%02x:%02x.%x, 0x%"HWADDR_PRIx", 0x%x) = 0x%"PRIx64"\n", > > + __func__, vdev->host.domain, vdev->host.bus, vdev->host.slot, > > + vdev->host.function, addr, size, val); > > + > > + return val; > > +} > > + > > +static const MemoryRegionOps vfio_rom_ops = { > > + .read = vfio_rom_read, > > + .endianness = DEVICE_LITTLE_ENDIAN, > > +}; > > + > > +static void vfio_pci_size_rom(VFIODevice *vdev) > > +{ > > + uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK; > > + off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; > > + char name[32]; > > + > > + if (vdev->pdev.romfile || !vdev->pdev.rom_bar) { > > + return; > > + } > > + > > + /* > > + * Use the same size ROM BAR as the physical device. The contents > > + * will get filled in later when the guest tries to read it. > > + */ > > + if (pread(vdev->fd, &orig, 4, offset) != 4 || > > + pwrite(vdev->fd, &size, 4, offset) != 4 || > > + pread(vdev->fd, &size, 4, offset) != 4 || > > + pwrite(vdev->fd, &orig, 4, offset) != 4) { > > > Do not you want to do byteswapping here? > This chunk fails on powerpc64. The rest works (with the corresponding > kernel changes). Yes, I think we just need this: --- a/hw/misc/vfio.c +++ b/hw/misc/vfio.c @@ -1142,7 +1142,7 @@ static const MemoryRegionOps vfio_rom_ops = { static void vfio_pci_size_rom(VFIODevice *vdev) { - uint32_t orig, size = (uint32_t)PCI_ROM_ADDRESS_MASK; + uint32_t orig, size = cpu_to_le32((uint32_t)PCI_ROM_ADDRESS_MASK); off_t offset = vdev->config_offset + PCI_ROM_ADDRESS; char name[32]; @@ -1164,7 +1164,7 @@ static void vfio_pci_size_rom(VFIODevice *vdev) return; } - size = ~(size & PCI_ROM_ADDRESS_MASK) + 1; + size = ~(le32_to_cpu(size) & PCI_ROM_ADDRESS_MASK) + 1; if (!size) { return; Does that work for you? Thanks, Alex -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html