On Thu, 31 Oct 2024 at 08:21, Jason Gunthorpe <jgg@xxxxxxxxxx> wrote: > > For SMMUv3 a IOMMU_DOMAIN_NESTED is composed of a S2 iommu_domain acting > as the parent and a user provided STE fragment that defines the CD table > and related data with addresses translated by the S2 iommu_domain. > > The kernel only permits userspace to control certain allowed bits of the > STE that are safe for user/guest control. > > IOTLB maintenance is a bit subtle here, the S1 implicitly includes the S2 > translation, but there is no way of knowing which S1 entries refer to a > range of S2. > > For the IOTLB we follow ARM's guidance and issue a CMDQ_OP_TLBI_NH_ALL to > flush all ASIDs from the VMID after flushing the S2 on any change to the > S2. > > The IOMMU_DOMAIN_NESTED can only be created from inside a VIOMMU as the > invalidation path relies on the VIOMMU to translate virtual stream ID used > in the invalidation commands for the CD table and ATS. > > Reviewed-by: Nicolin Chen <nicolinc@xxxxxxxxxx> > Reviewed-by: Kevin Tian <kevin.tian@xxxxxxxxx> > Reviewed-by: Jerry Snitselaar <jsnitsel@xxxxxxxxxx> > Reviewed-by: Donald Dutile <ddutile@xxxxxxxxxx> > Signed-off-by: Nicolin Chen <nicolinc@xxxxxxxxxx> > Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> > --- > .../arm/arm-smmu-v3/arm-smmu-v3-iommufd.c | 157 ++++++++++++++++++ > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 17 +- > drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h | 26 +++ > include/uapi/linux/iommufd.h | 20 +++ > 4 files changed, 219 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c > index 60dd9e90759571..0b9fffc5b2f09b 100644 > --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c > +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-iommufd.c > @@ -30,7 +30,164 @@ void *arm_smmu_hw_info(struct device *dev, u32 *length, u32 *type) > return info; > } > > +static void arm_smmu_make_nested_cd_table_ste( > + struct arm_smmu_ste *target, struct arm_smmu_master *master, > + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) > +{ > + arm_smmu_make_s2_domain_ste( > + target, master, nested_domain->vsmmu->s2_parent, ats_enabled); > + > + target->data[0] = cpu_to_le64(STRTAB_STE_0_V | > + FIELD_PREP(STRTAB_STE_0_CFG, > + STRTAB_STE_0_CFG_NESTED)); > + target->data[0] |= nested_domain->ste[0] & > + ~cpu_to_le64(STRTAB_STE_0_CFG); > + target->data[1] |= nested_domain->ste[1]; > +} > + > +/* > + * Create a physical STE from the virtual STE that userspace provided when it > + * created the nested domain. Using the vSTE userspace can request: > + * - Non-valid STE > + * - Abort STE > + * - Bypass STE (install the S2, no CD table) > + * - CD table STE (install the S2 and the userspace CD table) > + */ > +static void arm_smmu_make_nested_domain_ste( > + struct arm_smmu_ste *target, struct arm_smmu_master *master, > + struct arm_smmu_nested_domain *nested_domain, bool ats_enabled) > +{ > + unsigned int cfg = > + FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(nested_domain->ste[0])); > + > + /* > + * Userspace can request a non-valid STE through the nesting interface. > + * We relay that into an abort physical STE with the intention that > + * C_BAD_STE for this SID can be generated to userspace. > + */ > + if (!(nested_domain->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) > + cfg = STRTAB_STE_0_CFG_ABORT; > + > + switch (cfg) { > + case STRTAB_STE_0_CFG_S1_TRANS: > + arm_smmu_make_nested_cd_table_ste(target, master, nested_domain, > + ats_enabled); > + break; > + case STRTAB_STE_0_CFG_BYPASS: > + arm_smmu_make_s2_domain_ste(target, master, > + nested_domain->vsmmu->s2_parent, > + ats_enabled); > + break; > + case STRTAB_STE_0_CFG_ABORT: > + default: > + arm_smmu_make_abort_ste(target); > + break; > + } > +} > + > +static int arm_smmu_attach_dev_nested(struct iommu_domain *domain, > + struct device *dev) > +{ > + struct arm_smmu_nested_domain *nested_domain = > + to_smmu_nested_domain(domain); > + struct arm_smmu_master *master = dev_iommu_priv_get(dev); > + struct arm_smmu_attach_state state = { > + .master = master, > + .old_domain = iommu_get_domain_for_dev(dev), > + .ssid = IOMMU_NO_PASID, > + /* Currently invalidation of ATC is not supported */ > + .disable_ats = true, > + }; > + struct arm_smmu_ste ste; > + int ret; > + > + if (nested_domain->vsmmu->smmu != master->smmu) > + return -EINVAL; > + if (arm_smmu_ssids_in_use(&master->cd_table)) > + return -EBUSY; > + > + mutex_lock(&arm_smmu_asid_lock); > + ret = arm_smmu_attach_prepare(&state, domain); > + if (ret) { > + mutex_unlock(&arm_smmu_asid_lock); > + return ret; > + } > + > + arm_smmu_make_nested_domain_ste(&ste, master, nested_domain, > + state.ats_enabled); > + arm_smmu_install_ste_for_dev(master, &ste); > + arm_smmu_attach_commit(&state); > + mutex_unlock(&arm_smmu_asid_lock); > + return 0; > +} > + > +static void arm_smmu_domain_nested_free(struct iommu_domain *domain) > +{ > + kfree(to_smmu_nested_domain(domain)); > +} > + > +static const struct iommu_domain_ops arm_smmu_nested_ops = { > + .attach_dev = arm_smmu_attach_dev_nested, > + .free = arm_smmu_domain_nested_free, > +}; > + > +static int arm_smmu_validate_vste(struct iommu_hwpt_arm_smmuv3 *arg) > +{ > + unsigned int cfg; > + > + if (!(arg->ste[0] & cpu_to_le64(STRTAB_STE_0_V))) { > + memset(arg->ste, 0, sizeof(arg->ste)); > + return 0; > + } > + > + /* EIO is reserved for invalid STE data. */ > + if ((arg->ste[0] & ~STRTAB_STE_0_NESTING_ALLOWED) || > + (arg->ste[1] & ~STRTAB_STE_1_NESTING_ALLOWED)) > + return -EIO; > + > + cfg = FIELD_GET(STRTAB_STE_0_CFG, le64_to_cpu(arg->ste[0])); > + if (cfg != STRTAB_STE_0_CFG_ABORT && cfg != STRTAB_STE_0_CFG_BYPASS && > + cfg != STRTAB_STE_0_CFG_S1_TRANS) > + return -EIO; > + return 0; > +} > + > +static struct iommu_domain * > +arm_vsmmu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags, > + const struct iommu_user_data *user_data) > +{ > + struct arm_vsmmu *vsmmu = container_of(viommu, struct arm_vsmmu, core); > + struct arm_smmu_nested_domain *nested_domain; > + struct iommu_hwpt_arm_smmuv3 arg; > + int ret; > + > + if (flags) > + return ERR_PTR(-EOPNOTSUPP); This check fails when using user page fault, with flags = IOMMU_HWPT_FAULT_ID_VALID (4) Strange, the check is not exist in last version? iommufd_viommu_alloc_hwpt_nested -> viommu->ops->alloc_domain_nested(viommu, flags, user_data) -> arm_vsmmu_alloc_domain_nested Thanks