On Wed, 2014-09-24 at 14:20 +0100, Andrew Barnes wrote: > hw/misc/vfio.c > > this patch adds: > * memory map intel opregion > * mirroring of bdsm to guest's device 0 not hosts. > > patch > --------------------- > > diff --git a/hw/misc/vfio.c b/hw/misc/vfio.c > index e88b610..54e549b 100644 > --- a/hw/misc/vfio.c > +++ b/hw/misc/vfio.c > @@ -5,6 +5,7 @@ > * > * Authors: > * Alex Williamson <alex.williamson@xxxxxxxxxx> > + * Andrew Barnes <andy@xxxxxxxxxxxxxxxx> IGD Support > * > * This work is licensed under the terms of the GNU GPL, version 2. See > * the COPYING file in the top-level directory. > @@ -56,6 +57,45 @@ > #define VFIO_ALLOW_KVM_MSI 1 > #define VFIO_ALLOW_KVM_MSIX 1 > > +/* A handy list of IGD device ID's */ > +#define IS_IGD_HASWELL(id) (id == 0x0402 \ > + || id == 0x0406 \ > + || id == 0x040a \ > + || id == 0x0412 \ > + || id == 0x0416 \ > + || id == 0x041a \ > + || id == 0x0a04 \ > + || id == 0x0a16 \ > + || id == 0x0a22 \ > + || id == 0x0a26 \ > + || id == 0x0a2a ) > +#define IS_IGD_IVYBRIDGE(id) (id == 0x0162 \ > + || id == 0x0166 \ > + || id == 0x016a \ > + || id == 0x0152 \ > + || id == 0x0156 \ > + || id == 0x015a ) > +#define IS_IGD_SANDYBRIDGE(id) (id == 0x0102 \ > + || id == 0x0106 \ > + || id == 0x0112 \ > + || id == 0x0116 \ > + || id == 0x0122 \ > + || id == 0x0126 \ > + || id ==0x010a ) > +#define IS_IGD_IRONLAKE_CLARKDALE(id) (id == 0x0042 ) > +#define IS_IGD_IRONLAKE_ARRANDALE(id) (id == 0x0046 ) > +#define IS_IGD(id) (IS_IGD_IRONLAKE_CLARKDALE(id) \ > + || IS_IGD_IRONLAKE_ARRANDALE(id) \ > + || IS_IGD_SANDYBRIDGE(id) \ > + || IS_IGD_IVYBRIDGE(id) \ > + || IS_IGD_HASWELL(id) ) > +#define IGD_BAR_MASK 0xFFFFFFFFFFFF0000 > +#define DMAR_OPERATION_TIMEOUT ((s_time_t)((_ms) * 1000000ULL)) > + > +#define PCI_CONFIG_INTEL_OPREGION 0xfc > +#define INTEL_OPREGION_PAGES 3 > +#define INTEL_OPREGION_SIZE INTEL_OPREGION_PAGES * > TARGET_PAGE_SIZE > + *GAG* > struct VFIODevice; > > typedef struct VFIOQuirk { > @@ -227,6 +267,8 @@ typedef struct VFIODevice { > bool has_pm_reset; > bool needs_reset; > bool rom_read_failed; > + MemoryRegion opregion; /* Intel opregion */ > + uint32_t host_opregion; /* Host address of opregion */ Let's at least create a new struct to host these with a pointer off of VFIODevice, then we only need to test IS_IGD() once any everywhere else can just test the pointer. > } VFIODevice; > > typedef struct VFIOGroup { > @@ -283,6 +325,18 @@ static void vfio_pci_write_config(PCIDevice *pdev, > uint32_t addr, > uint32_t val, int len); > static void vfio_mmap_set_enabled(VFIODevice *vdev, bool enabled); > > +static void vfio_set_word_bits(uint8_t *buf, uint16_t val, uint16_t mask); > +static void vfio_add_emulated_word(VFIODevice *vdev, int pos, > + uint16_t val, uint16_t mask); > +static void vfio_set_long_bits(uint8_t *buf, uint32_t val, uint32_t mask); > +static void vfio_add_emulated_long(VFIODevice *vdev, int pos, > + uint32_t val, uint32_t mask); > +static void vfio_add_emulated_rw_long(VFIODevice *vdev, int pos, > + uint32_t val, uint32_t mask); > +static void vfio_map_igdopregion(VFIODevice *vdev, uint32_t > guest_opregion); > + > +static VFIODevice *igdvfio; Unused? > + > /* > * Common VFIO interrupt disable > */ > @@ -2324,27 +2378,46 @@ static uint32_t vfio_pci_read_config(PCIDevice > *pdev, uint32_t addr, int len) > VFIODevice *vdev = DO_UPCAST(VFIODevice, pdev, pdev); > uint32_t emu_bits = 0, emu_val = 0, phys_val = 0, val; > > - memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); > - emu_bits = le32_to_cpu(emu_bits); > - > - if (emu_bits) { > - emu_val = pci_default_read_config(pdev, addr, len); > + /* BDSM mirror - BDSM can be read at either 0xb0 device 0, or 0x5c > device 2. > + * Redirect this mirror from host 0xb0 device 0 to guest 0xb0 device > 0.*/ Defining BDSM would be helpful. > + if (IS_IGD(pci_get_word(pdev->config + PCI_DEVICE_ID)) && > ranges_overlap(addr,len,0x5c,4)) > + { > + DPRINTF("%s Read Trapped (%04x:%02x:%02x.%x, @0x%x, 0x%x, > len=0x%x)\n", __func__, > + vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function, addr, val, len); > + PCIBus *root = pci_find_primary_bus(); > + PCIDevice *q35 = pci_find_device(root,0,PCI_DEVFN(0, 0)); > + val = pci_default_read_config(q35, 0xb0, len); My system shows: $ sudo setpci -s 0:0.0 b0.l dba00001 $ sudo setpci -s 0:2.0 5c.l dba00001 So why are we doing this? Is one of these other patches emulating b0 on the host bridge and we're trying to access that emulation here? A patch series that's simply a full diff of each file is really hard to review. > } > > - if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { > - ssize_t ret; > + else > + { > + memcpy(&emu_bits, vdev->emulated_config_bits + addr, len); > + emu_bits = le32_to_cpu(emu_bits); > + > + if (emu_bits) { > + emu_val = pci_default_read_config(pdev, addr, len); > + DPRINTF("%s emulated read: %x \n",__func__,emu_val); > > - ret = pread(vdev->fd, &phys_val, len, vdev->config_offset + addr); > - if (ret != len) { > - error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: %m", > - __func__, vdev->host.domain, vdev->host.bus, > - vdev->host.slot, vdev->host.function, addr, len); > - return -errno; > } > - phys_val = le32_to_cpu(phys_val); > - } > > - val = (emu_val & emu_bits) | (phys_val & ~emu_bits); > + if (~emu_bits & (0xffffffffU >> (32 - len * 8))) { > + ssize_t ret; > + > + ret = pread(vdev->fd, &phys_val, len, vdev->config_offset + > addr); > + if (ret != len) { > + error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x) failed: > %m", > + __func__, vdev->host.domain, vdev->host.bus, > + vdev->host.slot, vdev->host.function, addr, > len); > + return -errno; > + } > + phys_val = le32_to_cpu(phys_val); > + DPRINTF("%s direct read: %x \n",__func__,phys_val); > + > + } > + > + val = (emu_val & emu_bits) | (phys_val & ~emu_bits); > + } > > DPRINTF("%s(%04x:%02x:%02x.%x, @0x%x, len=0x%x) %x\n", __func__, > vdev->host.domain, vdev->host.bus, vdev->host.slot, > @@ -2363,12 +2436,30 @@ static void vfio_pci_write_config(PCIDevice *pdev, > uint32_t addr, > vdev->host.domain, vdev->host.bus, vdev->host.slot, > vdev->host.function, addr, val, len); > > + /* A write to OPREGION base address means that seabios has allocated a > new memory region for OPREGION > + * in the guest. */ > + if (IS_IGD(pci_get_word(pdev->config + PCI_DEVICE_ID)) && > ranges_overlap(addr,len,PCI_CONFIG_INTEL_OPREGION,4)) > + { > + DPRINTF("%s Write Trapped (%04x:%02x:%02x.%x, @0x%x, 0x%x, > len=0x%x)\n", __func__, > + vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function, addr, val, len); > + //val = (val & 0xfffff000) | (vdev->host_opregion & 0xfff); > + vfio_map_igdopregion(vdev,val); > + goto defaultwrite; > + } > + > /* Write everything to VFIO, let it filter out what we can't write */ > - if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != len) > { > + else if (pwrite(vdev->fd, &val_le, len, vdev->config_offset + addr) != > len) { > error_report("%s(%04x:%02x:%02x.%x, 0x%x, 0x%x, 0x%x) failed: %m", > __func__, vdev->host.domain, vdev->host.bus, > vdev->host.slot, vdev->host.function, addr, val, len); > } > + else > + { > + DPRINTF("%s Written to VFIO (%04x:%02x:%02x.%x, @0x%x, 0x%x, > len=0x%x)\n", __func__, > + vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function, addr, val, len); > + } > > /* MSI/MSI-X Enabling/Disabling */ > if (pdev->cap_present & QEMU_PCI_CAP_MSI && > @@ -2405,7 +2496,11 @@ static void vfio_pci_write_config(PCIDevice *pdev, > uint32_t addr, > } > } else { > /* Write everything to QEMU to keep emulated bits correct */ > - pci_default_write_config(pdev, addr, val, len); > +defaultwrite: > + pci_default_write_config(pdev, addr, val, len); > + DPRINTF("%s Default Write (%04x:%02x:%02x.%x, @0x%x, 0x%x, > len=0x%x)\n", __func__, > + vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function, addr, val, len); > } > } > > @@ -3065,7 +3160,7 @@ static void vfio_set_word_bits(uint8_t *buf, uint16_t > val, uint16_t mask) > { > pci_set_word(buf, (pci_get_word(buf) & ~mask) | val); > } > - > +/* helper functions make read-only emulated registers! */ > static void vfio_add_emulated_word(VFIODevice *vdev, int pos, > uint16_t val, uint16_t mask) > { > @@ -3087,6 +3182,62 @@ static void vfio_add_emulated_long(VFIODevice *vdev, > int pos, > vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); > } > > +static void vfio_add_emulated_rw_long(VFIODevice *vdev, int pos, > + uint32_t val, uint32_t mask) > +{ > + vfio_set_long_bits(vdev->pdev.config + pos, val, mask); > + vfio_set_long_bits(vdev->pdev.wmask + pos, mask, mask); > + vfio_set_long_bits(vdev->emulated_config_bits + pos, mask, mask); > +} > + > +/* Setup the mapping of the opregion ready for Seabios to allocate the > guest location */ > +static void vfio_setup_igdopregion(VFIODevice *vdev) > +{ > + PCIDevice *pdev = &vdev->pdev; > + int fd; > + char name[64]; > + void *map; > + > + vdev->host_opregion = > vfio_pci_read_config(pdev,PCI_CONFIG_INTEL_OPREGION,4); > + > + DPRINTF("%s Setup IGD OpRegion: %x\n",__func__,vdev->host_opregion); > + > + snprintf(name, sizeof(name), "VFIO %04x:%02x:%02x.%x IGDOPREGION mmap", > + vdev->host.domain, vdev->host.bus, vdev->host.slot, > + vdev->host.function); > + > + fd = open("/dev/mem", O_RDWR); > + map = > mmap(NULL,INTEL_OPREGION_SIZE,PROT_READ|PROT_WRITE,MAP_SHARED,fd,(vdev->host_opregion > & ~0xfff)); > + if (map == MAP_FAILED) > + { > + map = NULL; > + DPRINTF("%s Map IGD OpRegion: MAP_FAILED\n",__func__); > + } > + memory_region_init_ram_ptr(&vdev->opregion, OBJECT(vdev), name, > INTEL_OPREGION_SIZE, map); This would of course need to be a new region exposed by vfio rather than using /dev/mem. We'd probably want to reserve a device specific region and indicate via region flags various IGD passthrus provided. > +} > + > +/* Seabios allocates the guest location for the opregion. This function > then memmory maps > + * host memory at that guest location */ > +static void vfio_map_igdopregion(VFIODevice *vdev, uint32_t new_opregion) > +{ > + PCIDevice *pdev = &vdev->pdev; > + MemoryRegion *guest_memory = get_system_memory(); > + uint32_t current_opregion = > vfio_pci_read_config(pdev,PCI_CONFIG_INTEL_OPREGION,4); > + > + if ( current_opregion != vdev->host_opregion ) > + { > + // remap > + DPRINTF("%s Delete IGD OpRegion: %x\n",__func__,(current_opregion > & ~0xfff)); > + memory_region_del_subregion(guest_memory, &vdev->opregion); > + } > + > + DPRINTF("%s Map IGD OpRegion: %x -> > %x\n",__func__,vdev->host_opregion,new_opregion); > + memory_region_add_subregion(guest_memory, (new_opregion & ~0xfff), > &vdev->opregion); > + > + DPRINTF("%s Adding 0xfc to emulated bits\n", __func__); > + vfio_add_emulated_rw_long(vdev, PCI_CONFIG_INTEL_OPREGION, > new_opregion, 0xffffffff); I know you've had issues with getting it to work, but we should really only need to call vfio_add_emulated_rw_long() once when we init the device, and then simply store the new value here. > +} > + > static int vfio_setup_pcie_cap(VFIODevice *vdev, int pos, uint8_t size) > { > uint16_t flags; > @@ -4028,7 +4179,8 @@ static int vfio_get_device(VFIOGroup *group, const > char *name, VFIODevice *vdev) > ret = ioctl(vdev->fd, VFIO_DEVICE_GET_IRQ_INFO, &irq_info); > if (ret) { > /* This can fail for an old kernel or legacy PCI dev */ > - DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n"); > + //DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure: %m\n"); > + DPRINTF("VFIO_DEVICE_GET_IRQ_INFO failure ret=%d\n", ret); > ret = 0; > } else if (irq_info.count == 1) { > vdev->pci_aer = true; > @@ -4253,6 +4405,11 @@ static int vfio_initfn(PCIDevice *pdev) > vdev->emulated_config_bits[PCI_HEADER_TYPE] = > > PCI_HEADER_TYPE_MULTI_FUNCTION; > > + if (IS_IGD(pci_get_word(pdev->config + PCI_DEVICE_ID))) > + { > + vfio_setup_igdopregion(vdev); > + } > + > /* Restore or clear multifunction, this is always controlled by QEMU */ > if (vdev->pdev.cap_present & QEMU_PCI_CAP_MULTIFUNCTION) { > vdev->pdev.config[PCI_HEADER_TYPE] |= > PCI_HEADER_TYPE_MULTI_FUNCTION; _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx