This ioctl interface sets up guest CR3 (gCR3) table, which is defined by guest IOMMU driver. It also enables nested I/O page translation in the host. Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> --- drivers/iommu/amd/amd_iommu.h | 12 ++++ drivers/iommu/amd/iommu.c | 107 ++++++++++++++++++++++++++++++++++ drivers/iommu/amd/viommu.c | 36 ++++++++++++ include/linux/iommu.h | 1 + include/uapi/linux/iommufd.h | 20 +++++++ 5 files changed, 176 insertions(+) diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index fccae07e8c9f..463cd59127b7 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -84,6 +84,18 @@ extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); +extern int amd_viommu_user_gcr3_update(const void *user_data, + struct iommu_domain *udom); +extern int amd_iommu_setup_gcr3_table(struct amd_iommu *iommu, + struct pci_dev *pdev, + struct iommu_domain *dom, + struct iommu_domain *udom, + int pasids, bool giov); +extern int amd_iommu_user_set_gcr3(struct amd_iommu *iommu, + struct iommu_domain *dom, + struct iommu_domain *udom, + struct pci_dev *pdev, u32 pasid, + unsigned long cr3); extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); extern void amd_iommu_iotlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *gather); diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index f22b2a5a8bfc..bff53977f8f7 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -80,6 +80,8 @@ struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); static int domain_enable_v2(struct protection_domain *domain, int pasids, bool giov); +static int __set_gcr3(struct protection_domain *domain, u32 pasid, + unsigned long cr3); /**************************************************************************** * @@ -2525,10 +2527,43 @@ static void *amd_iommu_hw_info(struct device *dev, u32 *length) return hwinfo; } +static struct iommu_domain * +amd_iommu_domain_alloc_user(struct device *dev, + enum iommu_hwpt_type hwpt_type, + struct iommu_domain *parent, + const union iommu_domain_user_data *user_data) +{ + int ret; + struct iommu_domain *dom = iommu_domain_alloc(dev->bus); + + if (!dom || !parent) + return dom; + + /* + * The parent is not null only when external driver calls IOMMUFD kAPI + * to create IOMMUFD_OBJ_HW_PAGETABLE to attach a bound device to IOAS. + * This is for nested (v2) page table. + * + * TODO: Currently, only support nested table w/ 1 pasid for GIOV use case. + * Add support for multiple pasids. + */ + dom->type = IOMMU_DOMAIN_NESTED; + + ret = amd_viommu_user_gcr3_update(user_data, dom); + if (ret) + goto err_out; + + return dom; +err_out: + iommu_domain_free(dom); + return NULL; +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .hw_info = amd_iommu_hw_info, .domain_alloc = amd_iommu_domain_alloc, + .domain_alloc_user = amd_iommu_domain_alloc_user, .probe_device = amd_iommu_probe_device, .release_device = amd_iommu_release_device, .probe_finalize = amd_iommu_probe_finalize, @@ -2537,6 +2572,7 @@ const struct iommu_ops amd_iommu_ops = { .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .def_domain_type = amd_iommu_def_domain_type, + .hw_info_type = IOMMU_HW_INFO_TYPE_AMD, .default_domain_ops = &(const struct iommu_domain_ops) { .attach_dev = amd_iommu_attach_device, .map_pages = amd_iommu_map_pages, @@ -2639,6 +2675,77 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids, bool giov) } EXPORT_SYMBOL(amd_iommu_domain_enable_v2); +int amd_iommu_setup_gcr3_table(struct amd_iommu *iommu, struct pci_dev *pdev, + struct iommu_domain *dom, + struct iommu_domain *udom, + int pasids, bool giov) +{ + int levels; + struct protection_domain *pdom = to_pdomain(dom); + struct protection_domain *updom = to_pdomain(udom); + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (updom->gcr3_tbl) + return -EINVAL; + + /* Number of GCR3 table levels required */ + for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) + levels += 1; + + if (levels > amd_iommu_max_glx_val) + return -EINVAL; + + updom->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); + if (updom->gcr3_tbl == NULL) + return -ENOMEM; + + updom->glx = levels; + updom->flags |= PD_IOMMUV2_MASK; + if (giov) + updom->flags |= PD_GIOV_MASK; + + set_dte_entry(iommu, dev_data->devid, pdom, updom, + updom->gcr3_tbl, + dev_data->ats.enabled, false); + clone_aliases(iommu, dev_data->dev); + + iommu_flush_dte(iommu, dev_data->devid); + iommu_completion_wait(iommu); + return 0; +} + +/* + * Note: For vIOMMU, the guest could be using different + * GCR3 table for each VFIO pass-through device. + * Therefore, we need to per-device GCR3 table. + */ +int amd_iommu_user_set_gcr3(struct amd_iommu *iommu, + struct iommu_domain *dom, + struct iommu_domain *udom, + struct pci_dev *pdev, u32 pasid, + unsigned long cr3) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + struct protection_domain *domain = to_pdomain(dom); + struct protection_domain *udomain = to_pdomain(udom); + unsigned long flags; + int ret; + + spin_lock_irqsave(&domain->lock, flags); + spin_lock_irqsave(&udomain->lock, flags); + + ret = __set_gcr3(udomain, pasid, cr3); + if (!ret) { + device_flush_dte(dev_data); + iommu_completion_wait(iommu); + } + + spin_unlock_irqrestore(&udomain->lock, flags); + spin_unlock_irqrestore(&domain->lock, flags); + + return ret; +} + static int __flush_pasid(struct protection_domain *domain, u32 pasid, u64 address, bool size) { diff --git a/drivers/iommu/amd/viommu.c b/drivers/iommu/amd/viommu.c index 1bd4282384c4..8ce3ee3d6bf5 100644 --- a/drivers/iommu/amd/viommu.c +++ b/drivers/iommu/amd/viommu.c @@ -1072,3 +1072,39 @@ int amd_viommu_cmdbuf_update(struct amd_viommu_cmdbuf_data *data) return -EINVAL; } EXPORT_SYMBOL(amd_viommu_cmdbuf_update); + +int amd_viommu_user_gcr3_update(const void *user_data, struct iommu_domain *udom) +{ + int ret; + struct pci_dev *pdev; + unsigned long npinned; + struct page *pages[2]; + struct iommu_domain *dom; + struct iommu_hwpt_amd_v2 *hwpt = (struct iommu_hwpt_amd_v2 *)user_data; + struct amd_iommu *iommu = get_amd_iommu_from_devid(hwpt->iommu_id); + u16 hdev_id = viommu_get_hdev_id(iommu, hwpt->gid, hwpt->gdev_id); + + pr_debug("%s: gid=%u, hdev_id=%#x, gcr3_va=%#llx\n", + __func__, hwpt->gid, hdev_id, (unsigned long long) hwpt->gcr3_va); + + npinned = get_user_pages_fast(hwpt->gcr3_va, 1, FOLL_WRITE, pages); + if (!npinned) { + pr_err("Failure locking grc3 page (%#llx).\n", hwpt->gcr3_va); + return -EINVAL; + } + + /* Allocate gcr3 table */ + pdev = pci_get_domain_bus_and_slot(0, PCI_BUS_NUM(hdev_id), + hdev_id & 0xff); + dom = iommu_get_domain_for_dev(&pdev->dev); + if (!dom) + return -EINVAL; + + /* TODO: Only support 1 pasid (zero) for now */ + ret = amd_iommu_setup_gcr3_table(iommu, pdev, dom, udom, 1, + iommu_feature(iommu, FEATURE_GIOSUP)); + if (ret) + pr_err("%s: Fail to enable gcr3 (devid=%#x)\n", __func__, pci_dev_id(pdev)); + + return amd_iommu_user_set_gcr3(iommu, dom, udom, pdev, 0, hwpt->gcr3); +} diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 4116f12d5f97..9239cd01d77c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -236,6 +236,7 @@ union iommu_domain_user_data { #endif struct iommu_hwpt_vtd_s1 vtd; struct iommu_hwpt_arm_smmuv3 smmuv3; + struct iommu_hwpt_amd_v2 amdv2; }; /** diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h index f8ea9faf6770..4147171429e1 100644 --- a/include/uapi/linux/iommufd.h +++ b/include/uapi/linux/iommufd.h @@ -408,6 +408,23 @@ struct iommu_hwpt_arm_smmuv3 { __aligned_u64 out_event_uptr; }; +/** + * struct iommu_hwpt_amd_v2 - AMD IOMMU specific user-managed + * v2 I/O page table data + * @gcr3: GCR3 guest physical ddress + * @gcr3_va: GCR3 host virtual address + * @gid: Guest ID + * @iommu_id: IOMMU host device ID + * @gdev_id: Guest device ID + */ +struct iommu_hwpt_amd_v2 { + __u64 gcr3; + __u64 gcr3_va; + __u32 gid; + __u32 iommu_id; + __u16 gdev_id; +}; + /** * enum iommu_hwpt_type - IOMMU HWPT Type * @IOMMU_HWPT_TYPE_DEFAULT: default @@ -418,6 +435,7 @@ enum iommu_hwpt_type { IOMMU_HWPT_TYPE_DEFAULT, IOMMU_HWPT_TYPE_VTD_S1, IOMMU_HWPT_TYPE_ARM_SMMUV3, + IOMMU_HWPT_TYPE_AMD_V2, }; /** @@ -523,11 +541,13 @@ struct iommu_hw_info_amd { * enum iommu_hw_info_type - IOMMU Hardware Info Types * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type + * @IOMMU_HW_INFO_TYPE_AMD: AMD IOMMU info type */ enum iommu_hw_info_type { IOMMU_HW_INFO_TYPE_NONE, IOMMU_HW_INFO_TYPE_INTEL_VTD, IOMMU_HW_INFO_TYPE_ARM_SMMUV3, + IOMMU_HW_INFO_TYPE_AMD, }; /** -- 2.34.1