Hi, On 11/25/19 10:30 AM, Alexandru Elisei wrote: > From: Julien Thierry <julien.thierry@xxxxxxx> > > When a guest can reassign BARs, kvmtool needs to maintain the vfio_region > consistent with their corresponding BARs. Take the new updated addresses > from the PCI header read back from the vfio driver. > > Also, to modify the BARs, it is expected that guests will disable > IO/Memory response in the PCI command. Support this by mapping/unmapping > regions when the corresponding response gets enabled/disabled. > > Cc: julien.thierry.kdev@xxxxxxxxx > Signed-off-by: Julien Thierry <julien.thierry@xxxxxxx> > [Fixed BAR selection] > Signed-off-by: Alexandru Elisei <alexandru.elisei@xxxxxxx> > --- > vfio/core.c | 8 ++--- > vfio/pci.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++--- > 2 files changed, 87 insertions(+), 9 deletions(-) I've finally had the chance to do more testing for PCI passthrough, and this patch is pretty broken, so far I've found that: kvmtool does trap-and-emulate for the BAR(s) dedicated to the MSI-X table and MSI-X PBA structure, and we don't take that into account; vfio_unmap_region doesn't destroy the memslot; when the guest enables memory or I/O accesses, we call vfio_map_region for all 6 BARs, even though some of them might be unimplemented (their value is 0). Thanks, Alex > diff --git a/vfio/core.c b/vfio/core.c > index 0ed1e6fee6bf..b554897fc8c1 100644 > --- a/vfio/core.c > +++ b/vfio/core.c > @@ -202,14 +202,13 @@ static int vfio_setup_trap_region(struct kvm *kvm, struct vfio_device *vdev, > struct vfio_region *region) > { > if (region->is_ioport) { > - int port = pci_get_io_port_block(region->info.size); > + int port = ioport__register(kvm, region->port_base, > + &vfio_ioport_ops, > + region->info.size, region); > > - port = ioport__register(kvm, port, &vfio_ioport_ops, > - region->info.size, region); > if (port < 0) > return port; > > - region->port_base = port; > return 0; > } > > @@ -258,6 +257,7 @@ void vfio_unmap_region(struct kvm *kvm, struct vfio_region *region) > { > if (region->host_addr) { > munmap(region->host_addr, region->info.size); > + region->host_addr = NULL; > } else if (region->is_ioport) { > ioport__unregister(kvm, region->port_base); > } else { > diff --git a/vfio/pci.c b/vfio/pci.c > index bc5a6d452f7a..28f895c06b27 100644 > --- a/vfio/pci.c > +++ b/vfio/pci.c > @@ -1,3 +1,4 @@ > +#include "kvm/ioport.h" > #include "kvm/irq.h" > #include "kvm/kvm.h" > #include "kvm/kvm-cpu.h" > @@ -464,6 +465,67 @@ static void vfio_pci_cfg_read(struct kvm *kvm, struct pci_device_header *pci_hdr > sz, offset); > } > > +static void vfio_pci_cfg_handle_command(struct kvm *kvm, struct vfio_device *vdev, > + void *data, int sz) > +{ > + struct pci_device_header *hdr = &vdev->pci.hdr; > + bool toggle_io; > + bool toggle_mem; > + u16 cmd; > + int i; > + > + cmd = ioport__read16(data); > + toggle_io = !!((cmd ^ hdr->command) & PCI_COMMAND_IO); > + toggle_mem = !!((cmd ^ hdr->command) & PCI_COMMAND_MEMORY); > + > + for (i = VFIO_PCI_BAR0_REGION_INDEX; i <= VFIO_PCI_BAR5_REGION_INDEX; ++i) { > + struct vfio_region *region = &vdev->regions[i]; > + > + if (region->is_ioport && toggle_io) { > + if (cmd & PCI_COMMAND_IO) > + vfio_map_region(kvm, vdev, region); > + else > + vfio_unmap_region(kvm, region); > + } > + > + if (!region->is_ioport && toggle_mem) { > + if (cmd & PCI_COMMAND_MEMORY) > + vfio_map_region(kvm, vdev, region); > + else > + vfio_unmap_region(kvm, region); > + } > + } > +} > + > +static void vfio_pci_cfg_update_bar(struct kvm *kvm, struct vfio_device *vdev, > + int bar_num, void *data, int sz) > +{ > + struct pci_device_header *hdr = &vdev->pci.hdr; > + struct vfio_region *region; > + uint32_t bar; > + > + region = &vdev->regions[bar_num + VFIO_PCI_BAR0_REGION_INDEX]; > + bar = ioport__read32(data); > + > + if (region->is_ioport) { > + if (hdr->command & PCI_COMMAND_IO) > + vfio_unmap_region(kvm, region); > + > + region->port_base = bar & PCI_BASE_ADDRESS_IO_MASK; > + > + if (hdr->command & PCI_COMMAND_IO) > + vfio_map_region(kvm, vdev, region); > + } else { > + if (hdr->command & PCI_COMMAND_MEMORY) > + vfio_unmap_region(kvm, region); > + > + region->guest_phys_addr = bar & PCI_BASE_ADDRESS_MEM_MASK; > + > + if (hdr->command & PCI_COMMAND_MEMORY) > + vfio_map_region(kvm, vdev, region); > + } > +} > + > static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hdr, > u8 offset, void *data, int sz) > { > @@ -471,6 +533,7 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd > struct vfio_pci_device *pdev; > struct vfio_device *vdev; > void *base = pci_hdr; > + int bar_num; > > pdev = container_of(pci_hdr, struct vfio_pci_device, hdr); > vdev = container_of(pdev, struct vfio_device, pci); > @@ -487,9 +550,17 @@ static void vfio_pci_cfg_write(struct kvm *kvm, struct pci_device_header *pci_hd > if (pdev->irq_modes & VFIO_PCI_IRQ_MODE_MSI) > vfio_pci_msi_cap_write(kvm, vdev, offset, data, sz); > > + if (offset == PCI_COMMAND) > + vfio_pci_cfg_handle_command(kvm, vdev, data, sz); > + > if (pread(vdev->fd, base + offset, sz, info->offset + offset) != sz) > vfio_dev_warn(vdev, "Failed to read %d bytes from Configuration Space at 0x%x", > sz, offset); > + > + if (offset >= PCI_BASE_ADDRESS_0 && offset <= PCI_BASE_ADDRESS_5) { > + bar_num = (offset - PCI_BASE_ADDRESS_0) / sizeof(u32); > + vfio_pci_cfg_update_bar(kvm, vdev, bar_num, data, sz); > + } > } > > static ssize_t vfio_pci_msi_cap_size(struct msi_cap_64 *cap_hdr) > @@ -808,6 +879,7 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, > size_t map_size; > struct vfio_pci_device *pdev = &vdev->pci; > struct vfio_region *region = &vdev->regions[nr]; > + bool map_now; > > if (nr >= vdev->info.num_regions) > return 0; > @@ -848,16 +920,22 @@ static int vfio_pci_configure_bar(struct kvm *kvm, struct vfio_device *vdev, > } > } > > - if (!region->is_ioport) { > + if (region->is_ioport) { > + region->port_base = pci_get_io_port_block(region->info.size); > + map_now = !!(pdev->hdr.command & PCI_COMMAND_IO); > + } else { > /* Grab some MMIO space in the guest */ > map_size = ALIGN(region->info.size, PAGE_SIZE); > region->guest_phys_addr = pci_get_mmio_block(map_size); > + map_now = !!(pdev->hdr.command & PCI_COMMAND_MEMORY); > } > > - /* Map the BARs into the guest or setup a trap region. */ > - ret = vfio_map_region(kvm, vdev, region); > - if (ret) > - return ret; > + if (map_now) { > + /* Map the BARs into the guest or setup a trap region. */ > + ret = vfio_map_region(kvm, vdev, region); > + if (ret) > + return ret; > + } > > return 0; > }