> From: Jean-Philippe Brucker > Sent: Saturday, February 29, 2020 1:26 AM > > Platforms without device-tree do not currently have a method for > describing the vIOMMU topology. Provide a topology description embedded > into the virtio device. > > Use PCI FIXUP to probe the config space early, because we need to > discover the topology before any DMA configuration takes place, and the > virtio driver may be loaded much later. Since we discover the topology > description when probing the PCI hierarchy, the virtual IOMMU cannot > manage other platform devices discovered earlier. > > This solution isn't elegant nor foolproof, but is the best we can do at can you elaborate "isn't elegant nor foolproof" part? is there any other limitation (beside pci fixup) along the route, when comparing it to the ACPI-approach? > the moment and works with existing virtio-iommu implementations. It also > enables an IOMMU for lightweight hypervisors that do not rely on > firmware methods for booting. > > Signed-off-by: Eric Auger <eric.auger@xxxxxxxxxx> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@xxxxxxxxxx> > --- > MAINTAINERS | 2 + > drivers/iommu/Kconfig | 10 + > drivers/iommu/Makefile | 1 + > drivers/iommu/virtio-iommu-topology.c | 343 > ++++++++++++++++++++++++++ > drivers/iommu/virtio-iommu.c | 3 + > include/linux/virt_iommu.h | 19 ++ > include/uapi/linux/virtio_iommu.h | 26 ++ > 7 files changed, 404 insertions(+) > create mode 100644 drivers/iommu/virtio-iommu-topology.c > create mode 100644 include/linux/virt_iommu.h > > diff --git a/MAINTAINERS b/MAINTAINERS > index fcd79fc38928..65a03ce53096 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -17781,6 +17781,8 @@ M: Jean-Philippe Brucker <jean- > philippe@xxxxxxxxxx> > L: virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx > S: Maintained > F: drivers/iommu/virtio-iommu.c > +F: drivers/iommu/virtio-iommu-topology.c > +F: include/linux/virt_iommu.h > F: include/uapi/linux/virtio_iommu.h > > VIRTUAL BOX GUEST DEVICE DRIVER > diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig > index c5df570ef84a..f8cb45d84bb0 100644 > --- a/drivers/iommu/Kconfig > +++ b/drivers/iommu/Kconfig > @@ -516,4 +516,14 @@ config VIRTIO_IOMMU > > Say Y here if you intend to run this kernel as a guest. > > +config VIRTIO_IOMMU_TOPOLOGY > + bool "Topology properties for the virtio-iommu" > + depends on VIRTIO_IOMMU > + default y > + help > + Enable early probing of the virtio-iommu device, to detect the > + built-in topology description. > + > + Say Y here if you intend to run this kernel as a guest. > + > endif # IOMMU_SUPPORT > diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile > index 9f33fdb3bb05..5da24280d08c 100644 > --- a/drivers/iommu/Makefile > +++ b/drivers/iommu/Makefile > @@ -37,3 +37,4 @@ obj-$(CONFIG_S390_IOMMU) += s390-iommu.o > obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o > obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o > obj-$(CONFIG_VIRTIO_IOMMU) += virtio-iommu.o > +obj-$(CONFIG_VIRTIO_IOMMU_TOPOLOGY) += virtio-iommu-topology.o > diff --git a/drivers/iommu/virtio-iommu-topology.c b/drivers/iommu/virtio- > iommu-topology.c > new file mode 100644 > index 000000000000..2188624ef216 > --- /dev/null > +++ b/drivers/iommu/virtio-iommu-topology.c > @@ -0,0 +1,343 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > + > +#include <linux/dma-iommu.h> > +#include <linux/list.h> > +#include <linux/pci.h> > +#include <linux/virt_iommu.h> > +#include <linux/virtio_ids.h> > +#include <linux/virtio_pci.h> > +#include <uapi/linux/virtio_iommu.h> > + > +struct viommu_cap_config { > + u8 bar; > + u32 length; /* structure size */ > + u32 offset; /* structure offset within the bar */ > +}; > + > +union viommu_topo_cfg { > + __le16 type; > + struct virtio_iommu_topo_pci_range pci; > + struct virtio_iommu_topo_endpoint ep; > +}; > + > +struct viommu_spec { > + struct device *dev; /* transport device */ > + struct fwnode_handle *fwnode; > + struct iommu_ops *ops; > + struct list_head list; > + size_t num_items; > + /* The config array of length num_items follows */ > + union viommu_topo_cfg cfg[]; > +}; > + > +static LIST_HEAD(viommus); > +static DEFINE_MUTEX(viommus_lock); > + > +#define VPCI_FIELD(field) offsetof(struct virtio_pci_cap, field) > + > +static inline int viommu_pci_find_capability(struct pci_dev *dev, u8 cfg_type, > + struct viommu_cap_config *cap) > +{ > + int pos; > + u8 bar; > + > + for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR); > + pos > 0; > + pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) { > + u8 type; > + > + pci_read_config_byte(dev, pos + VPCI_FIELD(cfg_type), > &type); > + if (type != cfg_type) > + continue; > + > + pci_read_config_byte(dev, pos + VPCI_FIELD(bar), &bar); > + > + /* Ignore structures with reserved BAR values */ > + if (type != VIRTIO_PCI_CAP_PCI_CFG && bar > 0x5) > + continue; > + > + cap->bar = bar; > + pci_read_config_dword(dev, pos + VPCI_FIELD(length), > + &cap->length); > + pci_read_config_dword(dev, pos + VPCI_FIELD(offset), > + &cap->offset); > + > + return pos; > + } > + return 0; > +} > + > +static void viommu_ccopy(__le32 *dst, u32 __iomem *src, size_t length) > +{ > + size_t i; > + > + /* For the moment all our config structures align on 32b */ > + if (WARN_ON(length % 4)) > + return; > + > + for (i = 0; i < length / 4; i++) > + /* Keep little-endian data */ > + dst[i] = cpu_to_le32(ioread32(src + i)); > +} > + > +static int viommu_parse_topology(struct device *dev, > + struct virtio_iommu_config __iomem *cfg) > +{ > + size_t i; > + size_t spec_length; > + struct viommu_spec *viommu_spec; > + u32 offset, item_length, num_items; > + > + offset = ioread32(&cfg->topo_config.offset); > + item_length = ioread32(&cfg->topo_config.item_length); > + num_items = ioread32(&cfg->topo_config.num_items); > + if (!offset || !num_items || !item_length) > + return 0; > + > + spec_length = sizeof(*viommu_spec) + num_items * > + sizeof(union viommu_topo_cfg); > + viommu_spec = kzalloc(spec_length, GFP_KERNEL); > + if (!viommu_spec) > + return -ENOMEM; > + > + viommu_spec->dev = dev; > + > + /* Copy in the whole array, sort it out later */ > + for (i = 0; i < num_items; i++) { > + size_t read_length = min_t(size_t, item_length, > + sizeof(union viommu_topo_cfg)); > + > + viommu_ccopy((__le32 *)&viommu_spec->cfg[i], > + (void __iomem *)cfg + offset, > + read_length); > + > + offset += item_length; > + } > + viommu_spec->num_items = num_items; > + > + mutex_lock(&viommus_lock); > + list_add(&viommu_spec->list, &viommus); > + mutex_unlock(&viommus_lock); > + > + return 0; > +} > + > +static void viommu_pci_parse_topology(struct pci_dev *dev) > +{ > + int pos; > + u32 features; > + void __iomem *regs; > + struct viommu_cap_config cap = {0}; > + struct virtio_pci_common_cfg __iomem *common_cfg; > + > + /* > + * The virtio infrastructure might not be loaded at this point. we need > + * to access the BARs ourselves. > + */ > + pos = viommu_pci_find_capability(dev, > VIRTIO_PCI_CAP_COMMON_CFG, &cap); > + if (!pos) { > + pci_warn(dev, "common capability not found\n"); > + return; > + } > + > + if (pci_enable_device_mem(dev)) > + return; > + > + regs = pci_iomap(dev, cap.bar, 0); > + if (!regs) > + return; > + > + common_cfg = regs + cap.offset; > + > + /* Find out if the device supports topology description */ > + writel(0, &common_cfg->device_feature_select); > + features = ioread32(&common_cfg->device_feature); > + > + pci_iounmap(dev, regs); > + > + if (!(features & BIT(VIRTIO_IOMMU_F_TOPOLOGY))) { > + pci_dbg(dev, "device doesn't have topology description"); > + return; > + } > + > + pos = viommu_pci_find_capability(dev, > VIRTIO_PCI_CAP_DEVICE_CFG, &cap); > + if (!pos) { > + pci_warn(dev, "device config capability not found\n"); > + return; > + } > + > + regs = pci_iomap(dev, cap.bar, 0); > + if (!regs) > + return; > + > + pci_info(dev, "parsing virtio-iommu topology\n"); > + viommu_parse_topology(&dev->dev, regs + cap.offset); > + pci_iounmap(dev, regs); > +} > + > +/* > + * Catch a PCI virtio-iommu implementation early to get the topology > description > + * before we start probing other endpoints. > + */ > +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REDHAT_QUMRANET, 0x1040 > + VIRTIO_ID_IOMMU, > + viommu_pci_parse_topology); > + > +/* > + * Return true if the device matches this topology structure. Write the > endpoint > + * ID into epid if it's the case. > + */ > +static bool viommu_parse_pci(struct pci_dev *pdev, union > viommu_topo_cfg *cfg, > + u32 *epid) > +{ > + u32 endpoint_start; > + u16 start, end, domain; > + u16 devid = pci_dev_id(pdev); > + u16 type = le16_to_cpu(cfg->type); > + > + if (type != VIRTIO_IOMMU_TOPO_PCI_RANGE) > + return false; > + > + start = le16_to_cpu(cfg->pci.requester_start); > + end = le16_to_cpu(cfg->pci.requester_end); > + domain = le16_to_cpu(cfg->pci.hierarchy); > + endpoint_start = le32_to_cpu(cfg->pci.endpoint_start); > + > + if (pci_domain_nr(pdev->bus) == domain && > + devid >= start && devid <= end) { > + *epid = devid - start + endpoint_start; > + return true; > + } > + return false; > +} > + > +static const struct iommu_ops *virt_iommu_setup(struct device *dev) > +{ > + struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev); > + const struct iommu_ops *viommu_ops = NULL; > + struct fwnode_handle *viommu_fwnode; > + struct viommu_spec *viommu_spec; > + struct pci_dev *pci_dev = NULL; > + struct device *viommu_dev; > + bool found = false; > + size_t i; > + u32 epid; > + int ret; > + > + /* Already translated? */ > + if (fwspec && fwspec->ops) > + return NULL; > + > + if (dev_is_pci(dev)) { > + pci_dev = to_pci_dev(dev); > + } else { > + /* At the moment we don't support platform devices */ > + return NULL; > + } > + > + mutex_lock(&viommus_lock); > + list_for_each_entry(viommu_spec, &viommus, list) { > + for (i = 0; i < viommu_spec->num_items; i++) { > + union viommu_topo_cfg *cfg = &viommu_spec- > >cfg[i]; > + > + found = viommu_parse_pci(pci_dev, cfg, &epid); > + if (found) > + break; > + } > + if (found) { > + viommu_ops = viommu_spec->ops; > + viommu_fwnode = viommu_spec->fwnode; > + viommu_dev = viommu_spec->dev; > + break; > + } > + } > + mutex_unlock(&viommus_lock); > + if (!found) > + return NULL; > + > + /* We're not translating ourselves. */ > + if (viommu_dev == dev) > + return NULL; > + > + /* > + * If we found a PCI range managed by the viommu, we're the ones > that > + * have to request ACS. > + */ > + if (pci_dev) > + pci_request_acs(); > + > + if (!viommu_ops) > + return ERR_PTR(-EPROBE_DEFER); > + > + ret = iommu_fwspec_init(dev, viommu_fwnode, viommu_ops); > + if (ret) > + return ERR_PTR(ret); > + > + iommu_fwspec_add_ids(dev, &epid, 1); > + > + return viommu_ops; > +} > + > +/** > + * virt_dma_configure - Configure DMA of virtualized devices > + * @dev: the endpoint > + * > + * Setup the DMA and IOMMU ops of a virtual device, for platforms without > DT or > + * ACPI. > + * > + * Return: -EPROBE_DEFER if the device is managed by an IOMMU that > hasn't been > + * probed yet, 0 otherwise > + */ > +int virt_dma_configure(struct device *dev) > +{ > + const struct iommu_ops *iommu_ops; > + > + iommu_ops = virt_iommu_setup(dev); > + if (IS_ERR_OR_NULL(iommu_ops)) { > + int ret = PTR_ERR(iommu_ops); > + > + if (ret == -EPROBE_DEFER || ret == 0) > + return ret; > + dev_err(dev, "error %d while setting up virt IOMMU\n", ret); > + return 0; > + } > + > + /* > + * If we have reason to believe the IOMMU driver missed the initial > + * add_device callback for dev, replay it to get things in order. > + */ > + if (dev->bus && !device_iommu_mapped(dev)) > + iommu_probe_device(dev); > + > + /* Assume coherent, as well as full 64-bit addresses. */ > +#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS > + arch_setup_dma_ops(dev, 0, ~0ULL, iommu_ops, true); > +#else > + iommu_setup_dma_ops(dev, 0, ~0ULL); > +#endif > + return 0; > +} > + > +/** > + * virt_set_iommu_ops - Set the IOMMU ops of a virtual IOMMU device > + * @dev: the IOMMU device (transport) > + * @ops: the new IOMMU ops or NULL > + * > + * Setup the iommu_ops associated to a viommu_spec, once the driver is > loaded > + * and the device probed. > + */ > +void virt_set_iommu_ops(struct device *dev, struct iommu_ops *ops) > +{ > + struct viommu_spec *viommu_spec; > + > + mutex_lock(&viommus_lock); > + list_for_each_entry(viommu_spec, &viommus, list) { > + if (viommu_spec->dev == dev) { > + viommu_spec->ops = ops; > + viommu_spec->fwnode = ops ? dev->fwnode : NULL; > + break; > + } > + } > + mutex_unlock(&viommus_lock); > +} > +EXPORT_SYMBOL_GPL(virt_set_iommu_ops); > diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c > index 93ff58632452..5429c12c879b 100644 > --- a/drivers/iommu/virtio-iommu.c > +++ b/drivers/iommu/virtio-iommu.c > @@ -21,6 +21,7 @@ > #include <linux/virtio.h> > #include <linux/virtio_config.h> > #include <linux/virtio_ids.h> > +#include <linux/virt_iommu.h> > #include <linux/wait.h> > > #include <uapi/linux/virtio_iommu.h> > @@ -1075,6 +1076,7 @@ static int viommu_probe(struct virtio_device *vdev) > if (ret) > goto err_free_vqs; > > + virt_set_iommu_ops(dev->parent, &viommu_ops); > iommu_device_set_ops(&viommu->iommu, &viommu_ops); > iommu_device_set_fwnode(&viommu->iommu, parent_dev- > >fwnode); > > @@ -1121,6 +1123,7 @@ static void viommu_remove(struct virtio_device > *vdev) > { > struct viommu_dev *viommu = vdev->priv; > > + virt_set_iommu_ops(vdev->dev.parent, NULL); > iommu_device_sysfs_remove(&viommu->iommu); > iommu_device_unregister(&viommu->iommu); > > diff --git a/include/linux/virt_iommu.h b/include/linux/virt_iommu.h > new file mode 100644 > index 000000000000..c68b03ec75ba > --- /dev/null > +++ b/include/linux/virt_iommu.h > @@ -0,0 +1,19 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef VIRTIO_IOMMU_H_ > +#define VIRTIO_IOMMU_H_ > + > +#if IS_ENABLED(CONFIG_VIRTIO_IOMMU_TOPOLOGY) > +int virt_dma_configure(struct device *dev); > +void virt_set_iommu_ops(struct device *dev, struct iommu_ops *ops); > +#else /* !CONFIG_VIRTIO_IOMMU_TOPOLOGY */ > +static inline int virt_dma_configure(struct device *dev) > +{ > + /* Don't disturb the normal DMA configuration methods */ > + return 0; > +} > + > +static inline void virt_set_iommu_ops(struct device *dev, struct iommu_ops > *ops) > +{ } > +#endif /* !CONFIG_VIRTIO_IOMMU_TOPOLOGY */ > + > +#endif /* VIRTIO_IOMMU_H_ */ > diff --git a/include/uapi/linux/virtio_iommu.h > b/include/uapi/linux/virtio_iommu.h > index 237e36a280cb..ec57d215086a 100644 > --- a/include/uapi/linux/virtio_iommu.h > +++ b/include/uapi/linux/virtio_iommu.h > @@ -16,6 +16,7 @@ > #define VIRTIO_IOMMU_F_BYPASS 3 > #define VIRTIO_IOMMU_F_PROBE 4 > #define VIRTIO_IOMMU_F_MMIO 5 > +#define VIRTIO_IOMMU_F_TOPOLOGY 6 > > struct virtio_iommu_range_64 { > __le64 start; > @@ -27,6 +28,12 @@ struct virtio_iommu_range_32 { > __le32 end; > }; > > +struct virtio_iommu_topo_config { > + __le32 offset; > + __le32 num_items; > + __le32 item_length; > +}; > + > struct virtio_iommu_config { > /* Supported page sizes */ > __le64 page_size_mask; > @@ -36,6 +43,25 @@ struct virtio_iommu_config { > struct virtio_iommu_range_32 domain_range; > /* Probe buffer size */ > __le32 probe_size; > + struct virtio_iommu_topo_config topo_config; > +}; > + > +#define VIRTIO_IOMMU_TOPO_PCI_RANGE 0x1 > +#define VIRTIO_IOMMU_TOPO_ENDPOINT 0x2 > + > +struct virtio_iommu_topo_pci_range { > + __le16 type; > + __le16 hierarchy; > + __le16 requester_start; > + __le16 requester_end; > + __le32 endpoint_start; > +}; > + > +struct virtio_iommu_topo_endpoint { > + __le16 type; > + __le16 reserved; > + __le32 endpoint; > + __le64 address; > }; > > /* Request types */ > -- > 2.25.0 > > _______________________________________________ > iommu mailing list > iommu@xxxxxxxxxxxxxxxxxxxxxxxxxx > https://lists.linuxfoundation.org/mailman/listinfo/iommu _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization