Hi Eric, > From: Auger Eric <eric.auger@xxxxxxxxxx> > Sent: Thursday, April 2, 2020 9:49 PM > To: Liu, Yi L <yi.l.liu@xxxxxxxxx>; qemu-devel@xxxxxxxxxx; > Subject: Re: [PATCH v2 05/22] hw/pci: modify pci_setup_iommu() to set > PCIIOMMUOps > > Hi Yi, > > On 4/2/20 3:37 PM, Liu, Yi L wrote: > > Hi Eric, > > > >> From: Auger Eric < eric.auger@xxxxxxxxxx > > >> Sent: Thursday, April 2, 2020 8:41 PM > >> To: Liu, Yi L <yi.l.liu@xxxxxxxxx>; qemu-devel@xxxxxxxxxx; > >> Subject: Re: [PATCH v2 05/22] hw/pci: modify pci_setup_iommu() to set > >> PCIIOMMUOps > >> > >> Hi Yi, > >> > >> On 4/2/20 10:52 AM, Liu, Yi L wrote: > >>>> From: Auger Eric < eric.auger@xxxxxxxxxx> > >>>> Sent: Monday, March 30, 2020 7:02 PM > >>>> To: Liu, Yi L <yi.l.liu@xxxxxxxxx>; qemu-devel@xxxxxxxxxx; > >>>> Subject: Re: [PATCH v2 05/22] hw/pci: modify pci_setup_iommu() to > >>>> set PCIIOMMUOps > >>>> > >>>> > >>>> > >>>> On 3/30/20 6:24 AM, Liu Yi L wrote: > >>>>> This patch modifies pci_setup_iommu() to set PCIIOMMUOps instead > >>>>> of setting PCIIOMMUFunc. PCIIOMMUFunc is used to get an address > >>>>> space for a PCI device in vendor specific way. The PCIIOMMUOps > >>>>> still offers this functionality. But using PCIIOMMUOps leaves > >>>>> space to add more iommu related vendor specific operations. > >>>>> > >>>>> Cc: Kevin Tian <kevin.tian@xxxxxxxxx> > >>>>> Cc: Jacob Pan <jacob.jun.pan@xxxxxxxxxxxxxxx> > >>>>> Cc: Peter Xu <peterx@xxxxxxxxxx> > >>>>> Cc: Eric Auger <eric.auger@xxxxxxxxxx> > >>>>> Cc: Yi Sun <yi.y.sun@xxxxxxxxxxxxxxx> > >>>>> Cc: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx> > >>>>> Cc: Michael S. Tsirkin <mst@xxxxxxxxxx> > >>>>> Reviewed-by: David Gibson <david@xxxxxxxxxxxxxxxxxxxxx> > >>>>> Reviewed-by: Peter Xu <peterx@xxxxxxxxxx> > >>>>> Signed-off-by: Liu Yi L <yi.l.liu@xxxxxxxxx> > >>>>> --- > >>>>> hw/alpha/typhoon.c | 6 +++++- > >>>>> hw/arm/smmu-common.c | 6 +++++- > >>>>> hw/hppa/dino.c | 6 +++++- > >>>>> hw/i386/amd_iommu.c | 6 +++++- > >>>>> hw/i386/intel_iommu.c | 6 +++++- > >>>>> hw/pci-host/designware.c | 6 +++++- > >>>>> hw/pci-host/pnv_phb3.c | 6 +++++- > >>>>> hw/pci-host/pnv_phb4.c | 6 +++++- > >>>>> hw/pci-host/ppce500.c | 6 +++++- > >>>>> hw/pci-host/prep.c | 6 +++++- > >>>>> hw/pci-host/sabre.c | 6 +++++- > >>>>> hw/pci/pci.c | 12 +++++++----- > >>>>> hw/ppc/ppc440_pcix.c | 6 +++++- > >>>>> hw/ppc/spapr_pci.c | 6 +++++- > >>>>> hw/s390x/s390-pci-bus.c | 8 ++++++-- hw/virtio/virtio-iommu.c > >>>>> | > >>>>> 6 > >>>>> +++++- > >>>>> include/hw/pci/pci.h | 8 ++++++-- > >>>>> include/hw/pci/pci_bus.h | 2 +- > >>>>> 18 files changed, 90 insertions(+), 24 deletions(-) > >>>>> > >>>>> diff --git a/hw/alpha/typhoon.c b/hw/alpha/typhoon.c index > >>>>> 1795e2f..f271de1 100644 > >>>>> --- a/hw/alpha/typhoon.c > >>>>> +++ b/hw/alpha/typhoon.c > >>>>> @@ -740,6 +740,10 @@ static AddressSpace > >>>>> *typhoon_pci_dma_iommu(PCIBus > >>>> *bus, void *opaque, int devfn) > >>>>> return &s->pchip.iommu_as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps typhoon_iommu_ops = { > >>>>> + .get_address_space = typhoon_pci_dma_iommu, }; > >>>>> + > >>>>> static void typhoon_set_irq(void *opaque, int irq, int level) { > >>>>> TyphoonState *s = opaque; > >>>>> @@ -897,7 +901,7 @@ PCIBus *typhoon_init(MemoryRegion *ram, ISABus > >>>> **isa_bus, qemu_irq *p_rtc_irq, > >>>>> "iommu-typhoon", UINT64_MAX); > >>>>> address_space_init(&s->pchip.iommu_as, MEMORY_REGION(&s- > >>>>> pchip.iommu), > >>>>> "pchip0-pci"); > >>>>> - pci_setup_iommu(b, typhoon_pci_dma_iommu, s); > >>>>> + pci_setup_iommu(b, &typhoon_iommu_ops, s); > >>>>> > >>>>> /* Pchip0 PCI special/interrupt acknowledge, 0x801.F800.0000, 64MB. */ > >>>>> memory_region_init_io(&s->pchip.reg_iack, OBJECT(s), > >>>>> &alpha_pci_iack_ops, diff --git a/hw/arm/smmu-common.c > >>>>> b/hw/arm/smmu-common.c index e13a5f4..447146e 100644 > >>>>> --- a/hw/arm/smmu-common.c > >>>>> +++ b/hw/arm/smmu-common.c > >>>>> @@ -343,6 +343,10 @@ static AddressSpace *smmu_find_add_as(PCIBus > >>>>> *bus, > >>>> void *opaque, int devfn) > >>>>> return &sdev->as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps smmu_ops = { > >>>>> + .get_address_space = smmu_find_add_as, }; > >>>>> + > >>>>> IOMMUMemoryRegion *smmu_iommu_mr(SMMUState *s, uint32_t sid) { > >>>>> uint8_t bus_n, devfn; > >>>>> @@ -437,7 +441,7 @@ static void smmu_base_realize(DeviceState > >>>>> *dev, Error > >>>> **errp) > >>>>> s->smmu_pcibus_by_busptr = g_hash_table_new(NULL, NULL); > >>>>> > >>>>> if (s->primary_bus) { > >>>>> - pci_setup_iommu(s->primary_bus, smmu_find_add_as, s); > >>>>> + pci_setup_iommu(s->primary_bus, &smmu_ops, s); > >>>>> } else { > >>>>> error_setg(errp, "SMMU is not attached to any PCI bus!"); > >>>>> } > >>>>> diff --git a/hw/hppa/dino.c b/hw/hppa/dino.c index > >>>>> 2b1b38c..3da4f84 > >>>>> 100644 > >>>>> --- a/hw/hppa/dino.c > >>>>> +++ b/hw/hppa/dino.c > >>>>> @@ -459,6 +459,10 @@ static AddressSpace > >>>>> *dino_pcihost_set_iommu(PCIBus > >>>> *bus, void *opaque, > >>>>> return &s->bm_as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps dino_iommu_ops = { > >>>>> + .get_address_space = dino_pcihost_set_iommu, }; > >>>>> + > >>>>> /* > >>>>> * Dino interrupts are connected as shown on Page 78, Table 23 > >>>>> * (Little-endian bit numbers) > >>>>> @@ -580,7 +584,7 @@ PCIBus *dino_init(MemoryRegion *addr_space, > >>>>> memory_region_add_subregion(&s->bm, 0xfff00000, > >>>>> &s->bm_cpu_alias); > >>>>> address_space_init(&s->bm_as, &s->bm, "pci-bm"); > >>>>> - pci_setup_iommu(b, dino_pcihost_set_iommu, s); > >>>>> + pci_setup_iommu(b, &dino_iommu_ops, s); > >>>>> > >>>>> *p_rtc_irq = qemu_allocate_irq(dino_set_timer_irq, s, 0); > >>>>> *p_ser_irq = qemu_allocate_irq(dino_set_serial_irq, s, 0); > >>>>> diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index > >>>>> b1175e5..5fec30e 100644 > >>>>> --- a/hw/i386/amd_iommu.c > >>>>> +++ b/hw/i386/amd_iommu.c > >>>>> @@ -1451,6 +1451,10 @@ static AddressSpace > >>>> *amdvi_host_dma_iommu(PCIBus *bus, void *opaque, int devfn) > >>>>> return &iommu_as[devfn]->as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps amdvi_iommu_ops = { > >>>>> + .get_address_space = amdvi_host_dma_iommu, }; > >>>>> + > >>>>> static const MemoryRegionOps mmio_mem_ops = { > >>>>> .read = amdvi_mmio_read, > >>>>> .write = amdvi_mmio_write, > >>>>> @@ -1577,7 +1581,7 @@ static void amdvi_realize(DeviceState *dev, > >>>>> Error **errp) > >>>>> > >>>>> sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); > >>>>> sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); > >>>>> - pci_setup_iommu(bus, amdvi_host_dma_iommu, s); > >>>>> + pci_setup_iommu(bus, &amdvi_iommu_ops, s); > >>>>> s->devid = object_property_get_int(OBJECT(&s->pci), "addr", errp); > >>>>> msi_init(&s->pci.dev, 0, 1, true, false, errp); > >>>>> amdvi_init(s); > >>>>> diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index > >>>>> df7ad25..4b22910 100644 > >>>>> --- a/hw/i386/intel_iommu.c > >>>>> +++ b/hw/i386/intel_iommu.c > >>>>> @@ -3729,6 +3729,10 @@ static AddressSpace > >>>>> *vtd_host_dma_iommu(PCIBus > >>>> *bus, void *opaque, int devfn) > >>>>> return &vtd_as->as; > >>>>> } > >>>>> > >>>>> +static PCIIOMMUOps vtd_iommu_ops = { > >>>> static const > >>> > >>> got it. > >>> > >>>>> + .get_address_space = vtd_host_dma_iommu, }; > >>>>> + > >>>>> static bool vtd_decide_config(IntelIOMMUState *s, Error **errp) { > >>>>> X86IOMMUState *x86_iommu = X86_IOMMU_DEVICE(s); @@ -3840,7 > >>>>> +3844,7 @@ static void vtd_realize(DeviceState *dev, Error **errp) > >>>>> g_free, g_free); > >>>>> vtd_init(s); > >>>>> sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, > >>>> Q35_HOST_BRIDGE_IOMMU_ADDR); > >>>>> - pci_setup_iommu(bus, vtd_host_dma_iommu, dev); > >>>>> + pci_setup_iommu(bus, &vtd_iommu_ops, dev); > >>>>> /* Pseudo address space under root PCI bus. */ > >>>>> x86ms->ioapic_as = vtd_host_dma_iommu(bus, s, > >>>> Q35_PSEUDO_DEVFN_IOAPIC); > >>>>> > >>>>> qemu_add_machine_init_done_notifier(&vtd_machine_done_notify); > >>>>> diff --git a/hw/pci-host/designware.c b/hw/pci-host/designware.c > >>>>> index dd24551..4c6338a 100644 > >>>>> --- a/hw/pci-host/designware.c > >>>>> +++ b/hw/pci-host/designware.c > >>>>> @@ -645,6 +645,10 @@ static AddressSpace > >>>> *designware_pcie_host_set_iommu(PCIBus *bus, void *opaque, > >>>>> return &s->pci.address_space; } > >>>>> > >>>>> +static const PCIIOMMUOps designware_iommu_ops = { > >>>>> + .get_address_space = designware_pcie_host_set_iommu, }; > >>>>> + > >>>>> static void designware_pcie_host_realize(DeviceState *dev, Error > >>>>> **errp) { > >>>>> PCIHostState *pci = PCI_HOST_BRIDGE(dev); @@ -686,7 +690,7 @@ > >>>>> static void designware_pcie_host_realize(DeviceState *dev, Error **errp) > >>>>> address_space_init(&s->pci.address_space, > >>>>> &s->pci.address_space_root, > >>>>> "pcie-bus-address-space"); > >>>>> - pci_setup_iommu(pci->bus, designware_pcie_host_set_iommu, s); > >>>>> + pci_setup_iommu(pci->bus, &designware_iommu_ops, s); > >>>>> > >>>>> qdev_set_parent_bus(DEVICE(&s->root), BUS(pci->bus)); > >>>>> qdev_init_nofail(DEVICE(&s->root)); > >>>>> diff --git a/hw/pci-host/pnv_phb3.c b/hw/pci-host/pnv_phb3.c index > >>>>> 74618fa..ecfe627 100644 > >>>>> --- a/hw/pci-host/pnv_phb3.c > >>>>> +++ b/hw/pci-host/pnv_phb3.c > >>>>> @@ -961,6 +961,10 @@ static AddressSpace > >>>>> *pnv_phb3_dma_iommu(PCIBus > >>>> *bus, void *opaque, int devfn) > >>>>> return &ds->dma_as; > >>>>> } > >>>>> > >>>>> +static PCIIOMMUOps pnv_phb3_iommu_ops = { > >>>> static const > >>> got it. :-) > >>> > >>>>> + .get_address_space = pnv_phb3_dma_iommu, }; > >>>>> + > >>>>> static void pnv_phb3_instance_init(Object *obj) { > >>>>> PnvPHB3 *phb = PNV_PHB3(obj); @@ -1059,7 +1063,7 @@ static > >>>>> void pnv_phb3_realize(DeviceState *dev, Error > >>>> **errp) > >>>>> &phb->pci_mmio, &phb->pci_io, > >>>>> 0, 4, > >>>>> TYPE_PNV_PHB3_ROOT_BUS); > >>>>> > >>>>> - pci_setup_iommu(pci->bus, pnv_phb3_dma_iommu, phb); > >>>>> + pci_setup_iommu(pci->bus, &pnv_phb3_iommu_ops, phb); > >>>>> > >>>>> /* Add a single Root port */ > >>>>> qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", > >>>>> phb->chip_id); diff --git a/hw/pci-host/pnv_phb4.c > >>>>> b/hw/pci-host/pnv_phb4.c index > >>>>> 23cf093..04e95e3 100644 > >>>>> --- a/hw/pci-host/pnv_phb4.c > >>>>> +++ b/hw/pci-host/pnv_phb4.c > >>>>> @@ -1148,6 +1148,10 @@ static AddressSpace > >>>>> *pnv_phb4_dma_iommu(PCIBus > >>>> *bus, void *opaque, int devfn) > >>>>> return &ds->dma_as; > >>>>> } > >>>>> > >>>>> +static PCIIOMMUOps pnv_phb4_iommu_ops = { > >>>> idem > >>> will add const. > >>> > >>>>> + .get_address_space = pnv_phb4_dma_iommu, }; > >>>>> + > >>>>> static void pnv_phb4_instance_init(Object *obj) { > >>>>> PnvPHB4 *phb = PNV_PHB4(obj); @@ -1205,7 +1209,7 @@ static > >>>>> void pnv_phb4_realize(DeviceState *dev, Error > >>>> **errp) > >>>>> pnv_phb4_set_irq, pnv_phb4_map_irq, phb, > >>>>> &phb->pci_mmio, &phb->pci_io, > >>>>> 0, 4, TYPE_PNV_PHB4_ROOT_BUS); > >>>>> - pci_setup_iommu(pci->bus, pnv_phb4_dma_iommu, phb); > >>>>> + pci_setup_iommu(pci->bus, &pnv_phb4_iommu_ops, phb); > >>>>> > >>>>> /* Add a single Root port */ > >>>>> qdev_prop_set_uint8(DEVICE(&phb->root), "chassis", > >>>>> phb->chip_id); diff --git a/hw/pci-host/ppce500.c > >>>>> b/hw/pci-host/ppce500.c index d710727..5baf5db 100644 > >>>>> --- a/hw/pci-host/ppce500.c > >>>>> +++ b/hw/pci-host/ppce500.c > >>>>> @@ -439,6 +439,10 @@ static AddressSpace > >>>>> *e500_pcihost_set_iommu(PCIBus > >>>> *bus, void *opaque, > >>>>> return &s->bm_as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps ppce500_iommu_ops = { > >>>>> + .get_address_space = e500_pcihost_set_iommu, }; > >>>>> + > >>>>> static void e500_pcihost_realize(DeviceState *dev, Error **errp) { > >>>>> SysBusDevice *sbd = SYS_BUS_DEVICE(dev); @@ -473,7 +477,7 @@ > >>>>> static void e500_pcihost_realize(DeviceState *dev, Error **errp) > >>>>> memory_region_init(&s->bm, OBJECT(s), "bm-e500", UINT64_MAX); > >>>>> memory_region_add_subregion(&s->bm, 0x0, &s->busmem); > >>>>> address_space_init(&s->bm_as, &s->bm, "pci-bm"); > >>>>> - pci_setup_iommu(b, e500_pcihost_set_iommu, s); > >>>>> + pci_setup_iommu(b, &ppce500_iommu_ops, s); > >>>>> > >>>>> pci_create_simple(b, 0, "e500-host-bridge"); > >>>>> > >>>>> diff --git a/hw/pci-host/prep.c b/hw/pci-host/prep.c index > >>>>> 1a02e9a..7c57311 100644 > >>>>> --- a/hw/pci-host/prep.c > >>>>> +++ b/hw/pci-host/prep.c > >>>>> @@ -213,6 +213,10 @@ static AddressSpace > >>>>> *raven_pcihost_set_iommu(PCIBus > >>>> *bus, void *opaque, > >>>>> return &s->bm_as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps raven_iommu_ops = { > >>>>> + .get_address_space = raven_pcihost_set_iommu, }; > >>>>> + > >>>>> static void raven_change_gpio(void *opaque, int n, int level) { > >>>>> PREPPCIState *s = opaque; > >>>>> @@ -303,7 +307,7 @@ static void raven_pcihost_initfn(Object *obj) > >>>>> memory_region_add_subregion(&s->bm, 0 , &s- > >>> bm_pci_memory_alias); > >>>>> memory_region_add_subregion(&s->bm, 0x80000000, &s- > >bm_ram_alias); > >>>>> address_space_init(&s->bm_as, &s->bm, "raven-bm"); > >>>>> - pci_setup_iommu(&s->pci_bus, raven_pcihost_set_iommu, s); > >>>>> + pci_setup_iommu(&s->pci_bus, &raven_iommu_ops, s); > >>>>> > >>>>> h->bus = &s->pci_bus; > >>>>> > >>>>> diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c index > >>>>> 2b8503b..251549b 100644 > >>>>> --- a/hw/pci-host/sabre.c > >>>>> +++ b/hw/pci-host/sabre.c > >>>>> @@ -112,6 +112,10 @@ static AddressSpace > >>>>> *sabre_pci_dma_iommu(PCIBus > >>>> *bus, void *opaque, int devfn) > >>>>> return &is->iommu_as; > >>>>> } > >>>>> > >>>>> +static const PCIIOMMUOps sabre_iommu_ops = { > >>>>> + .get_address_space = sabre_pci_dma_iommu, }; > >>>>> + > >>>>> static void sabre_config_write(void *opaque, hwaddr addr, > >>>>> uint64_t val, unsigned size) { @@ > >>>>> -402,7 +406,7 @@ static void sabre_realize(DeviceState *dev, Error **errp) > >>>>> /* IOMMU */ > >>>>> memory_region_add_subregion_overlap(&s->sabre_config, 0x200, > >>>>> sysbus_mmio_get_region(SYS_BUS_DEVICE(s->iommu), 0), 1); > >>>>> - pci_setup_iommu(phb->bus, sabre_pci_dma_iommu, s->iommu); > >>>>> + pci_setup_iommu(phb->bus, &sabre_iommu_ops, s->iommu); > >>>>> > >>>>> /* APB secondary busses */ > >>>>> pci_dev = pci_create_multifunction(phb->bus, PCI_DEVFN(1, 0), > >>>>> true, diff --git a/hw/pci/pci.c b/hw/pci/pci.c index > >>>>> e1ed667..aa9025c > >>>>> 100644 > >>>>> --- a/hw/pci/pci.c > >>>>> +++ b/hw/pci/pci.c > >>>>> @@ -2644,7 +2644,7 @@ AddressSpace > >>>> *pci_device_iommu_address_space(PCIDevice *dev) > >>>>> PCIBus *iommu_bus = bus; > >>>>> uint8_t devfn = dev->devfn; > >>>>> > >>>>> - while (iommu_bus && !iommu_bus->iommu_fn && iommu_bus- > >>> parent_dev) > >>>> { > >>>>> + while (iommu_bus && !iommu_bus->iommu_ops && > >>>>> + iommu_bus->parent_dev) { > >>>> Depending on future usage, this is not strictly identical to the > >>>> original code. You exit the loop as soon as a iommu_bus->iommu_ops > >>>> is set whatever the presence of get_address_space(). > >>> > >>> To be identical with original code, may adding the > >>> get_address_space() presence check. Then the loop exits when the > >>> iommu_bus->iommu_ops is set and meanwhile iommu_bus->iommu_ops- > >get_address_space() is set. > >>> But is it possible that there is an intermediate iommu_bus which has > >>> iommu_ops set but the get_address_space() is clear. I guess not as > >>> iommu_ops is set by vIOMMU and vIOMMU won't differentiate buses? > >> > >> I don't know. That depends on how the ops are going to be used in the > >> future. Can't you enforce the fact that get_address_space() is a mandatory ops? > > > > No, I didn't mean that. Actually, in the patch, the get_address_space() presence is > checked. > > I'm not sure if your point is to add get_address_space() presence > > check instead of just checking the iommu_ops presence. > Yes that was my point. I wanted to underline the checks are not strictly identical > and maybe during enumeration you may find a device with ops set and no > get_address_space(). I see. But this happens only when there are multiple iommu_ops instances provided by vIOMMU. right? For some buses, vIOMMU set an iommu_ops with get_address_space() set, but for others it sets an iommu_ops w/o get_address_space(). Is it possible? > Then I meant maybe you should enforce somewhere in the > code or in the documentation that get_address_space() is a mandatory operation in > the ops struct and must be set as soon as the struct is passed. maybe in > pci_setup_iommu() you could check that get_address_space is set? How about other callbacks in iommu_ops? It would be strange that only check get_address_space() and ignore other callbacks. Regards, Yi Liu