On Mon, Dec 05, 2022 at 04:34:08PM +0100, Niklas Schnelle wrote: > On Tue, 2022-11-29 at 16:09 -0400, Jason Gunthorpe wrote: > > On Tue, Nov 29, 2022 at 06:41:22PM +0000, Robin Murphy wrote: > > > On 2022-11-29 17:33, Jason Gunthorpe wrote: > > > > On Mon, Nov 28, 2022 at 09:01:43PM +0000, Robin Murphy wrote: > > > > > > > > > I'm hardly an advocate for trying to save users from themselves, but I > > > > > honestly can't see any justifiable reason for not having sysfs respect > > > > > iommu_get_def_domain_type(). > > > > > > > > We really need to rename this value if it is not actually just an > > > > advisory "default" but a functional requirement .. > > > > > > It represents a required default domain type. As in, the type for the > > > device's default domain. Not the default type for a domain. It's the > > > iommu_def_domain_type variable that holds the *default* default domain type > > > ;) > > > > I find the name "default domain" incredibly confusing at this point in > > time. > > Yes it definitely confused me as evidenced by this patch. I did some fiddling what this could rename could look like and came up with this very sketchy sketch. I think it looks a lot clearer in the end but it seems a bit tricky to break things up into nice patches. Jason diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index beb9f535d3d815..052caf21fee3dd 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -34,7 +34,12 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); -static unsigned int iommu_def_domain_type __read_mostly; +static enum dma_api_policy { + DMA_API_POLICY_IDENTITY, + DMA_API_POLICY_STRICT, + DMA_API_POLICY_LAZY, + DMA_API_POLICY_ANY, +} dma_api_default_policy __read_mostly; static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT); static u32 iommu_cmd_line __read_mostly; @@ -82,7 +87,7 @@ static const char * const iommu_group_resv_type_string[] = { static int iommu_bus_notifier(struct notifier_block *nb, unsigned long action, void *data); static int iommu_alloc_default_domain(struct iommu_group *group, - struct device *dev); + enum dma_api_policy policy); static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus, unsigned type); static int __iommu_attach_device(struct iommu_domain *domain, @@ -97,6 +102,9 @@ static struct iommu_group *iommu_group_get_for_dev(struct device *dev); static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count); +static enum dma_api_policy +iommu_get_dma_api_mandatory_policy(struct device *dev); + #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store) \ struct iommu_group_attribute iommu_group_attr_##_name = \ __ATTR(_name, _mode, _show, _store) @@ -125,26 +133,6 @@ static struct bus_type * const iommu_buses[] = { #endif }; -/* - * Use a function instead of an array here because the domain-type is a - * bit-field, so an array would waste memory. - */ -static const char *iommu_domain_type_str(unsigned int t) -{ - switch (t) { - case IOMMU_DOMAIN_BLOCKED: - return "Blocked"; - case IOMMU_DOMAIN_IDENTITY: - return "Passthrough"; - case IOMMU_DOMAIN_UNMANAGED: - return "Unmanaged"; - case IOMMU_DOMAIN_DMA: - return "Translated"; - default: - return "Unknown"; - } -} - static int __init iommu_subsys_init(void) { struct notifier_block *nb; @@ -161,19 +149,27 @@ static int __init iommu_subsys_init(void) } } - if (!iommu_default_passthrough() && !iommu_dma_strict) - iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ; + if (dma_api_default_policy == DMA_API_POLICY_LAZY && iommu_dma_strict) + dma_api_default_policy = DMA_API_POLICY_STRICT; - pr_info("Default domain type: %s %s\n", - iommu_domain_type_str(iommu_def_domain_type), - (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? - "(set via kernel command line)" : ""); - - if (!iommu_default_passthrough()) - pr_info("DMA domain TLB invalidation policy: %s mode %s\n", + switch (dma_api_default_policy) { + case DMA_API_POLICY_LAZY: + case DMA_API_POLICY_STRICT: + pr_info("DMA translated domain TLB invalidation policy: %s mode %s\n", iommu_dma_strict ? "strict" : "lazy", (iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ? - "(set via kernel command line)" : ""); + "(set via kernel command line)" : + ""); + break; + case DMA_API_POLICY_IDENTITY: + pr_info("Default domain type: Passthrough %s\n", + (iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ? + "(set via kernel command line)" : + ""); + break; + default: + break; + } nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL); if (!nb) @@ -353,7 +349,8 @@ int iommu_probe_device(struct device *dev) * checked. */ mutex_lock(&group->mutex); - iommu_alloc_default_domain(group, dev); + iommu_alloc_default_domain(group, + iommu_get_dma_api_mandatory_policy(dev)); /* * If device joined an existing group which has been claimed, don't @@ -436,8 +433,8 @@ early_param("iommu.strict", iommu_dma_setup); void iommu_set_dma_strict(void) { iommu_dma_strict = true; - if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ) - iommu_def_domain_type = IOMMU_DOMAIN_DMA; + if (dma_api_default_policy == DMA_API_POLICY_LAZY) + dma_api_default_policy = DMA_API_POLICY_STRICT; } static ssize_t iommu_group_attr_show(struct kobject *kobj, @@ -1557,53 +1554,100 @@ struct iommu_group *fsl_mc_device_group(struct device *dev) } EXPORT_SYMBOL_GPL(fsl_mc_device_group); -static int iommu_get_def_domain_type(struct device *dev) +static enum dma_api_policy +iommu_get_dma_api_mandatory_policy(struct device *dev) { const struct iommu_ops *ops = dev_iommu_ops(dev); if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted) - return IOMMU_DOMAIN_DMA; + return DMA_API_POLICY_STRICT; if (ops->def_domain_type) return ops->def_domain_type(dev); - - return 0; + return DMA_API_POLICY_ANY; } -static int iommu_group_alloc_default_domain(struct bus_type *bus, - struct iommu_group *group, - unsigned int type) +static int __iommu_get_dma_api_group_mandatory_policy(struct device *dev, + void *data) { - struct iommu_domain *dom; + enum dma_api_policy *policy = data; + enum dma_api_policy dev_policy = + iommu_get_dma_api_mandatory_policy(dev); - dom = __iommu_domain_alloc(bus, type); - if (!dom && type != IOMMU_DOMAIN_DMA) { - dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); - if (dom) - pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA", - type, group->name); + if (dev_policy == DMA_API_POLICY_ANY || *policy == dev_policy) + return 0; + if (*policy == DMA_API_POLICY_ANY) { + *policy = dev_policy; + return 0; } - if (!dom) - return -ENOMEM; + dev_warn( + dev, + "IOMMU driver is requesting different DMA API policies for devices in the same group %s.", + dev->iommu_group->name); - group->default_domain = dom; - if (!group->domain) - group->domain = dom; + /* + * Resolve the conflict by priority, the lower numbers in the enum are + * higher preference in case of driver problems. + */ + if (dev_policy < *policy) + *policy = dev_policy; return 0; } +static enum dma_api_policy +iommu_get_dma_api_group_mandatory_policy(struct iommu_group *group) +{ + enum dma_api_policy policy = DMA_API_POLICY_ANY; + + __iommu_group_for_each_dev(group, &policy, + __iommu_get_dma_api_group_mandatory_policy); + return policy; +} + static int iommu_alloc_default_domain(struct iommu_group *group, - struct device *dev) + enum dma_api_policy policy) { - unsigned int type; + struct iommu_domain *domain; + struct group_device *grp_dev = + list_first_entry(&group->devices, struct group_device, list); + struct bus_type *bus = grp_dev->dev->bus; + + lockdep_assert_held(group->mutex); if (group->default_domain) return 0; + if (policy == DMA_API_POLICY_ANY) + policy = dma_api_default_policy; + switch (policy) { + case DMA_API_POLICY_IDENTITY: + domain = __iommu_domain_alloc(bus, IOMMU_DOMAIN_IDENTITY); + break; + + case DMA_API_POLICY_LAZY: + domain = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); + if (domain && !domain->ops->prefer_dma_api_fq){ + pr_warn("Failed to allocate default lazy IOMMU domain for group %s - Falling back to strict", + group->name); + } + break; - type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type; + case DMA_API_POLICY_STRICT: + domain = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA); + break; - return iommu_group_alloc_default_domain(dev->bus, group, type); + default: + WARN_ON(true); + domain = NULL; + } + + if (!domain) + return -ENOMEM; + + group->default_domain = domain; + if (!group->domain) + group->domain = domain; + return 0; } /** @@ -1688,52 +1732,6 @@ static int iommu_bus_notifier(struct notifier_block *nb, return 0; } -struct __group_domain_type { - struct device *dev; - unsigned int type; -}; - -static int probe_get_default_domain_type(struct device *dev, void *data) -{ - struct __group_domain_type *gtype = data; - unsigned int type = iommu_get_def_domain_type(dev); - - if (type) { - if (gtype->type && gtype->type != type) { - dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n", - iommu_domain_type_str(type), - dev_name(gtype->dev), - iommu_domain_type_str(gtype->type)); - gtype->type = 0; - } - - if (!gtype->dev) { - gtype->dev = dev; - gtype->type = type; - } - } - - return 0; -} - -static void probe_alloc_default_domain(struct bus_type *bus, - struct iommu_group *group) -{ - struct __group_domain_type gtype; - - memset(>ype, 0, sizeof(gtype)); - - /* Ask for default domain requirements of all devices in the group */ - __iommu_group_for_each_dev(group, >ype, - probe_get_default_domain_type); - - if (!gtype.type) - gtype.type = iommu_def_domain_type; - - iommu_group_alloc_default_domain(bus, group, gtype.type); - -} - static int iommu_group_do_dma_attach(struct device *dev, void *data) { struct iommu_domain *domain = data; @@ -1804,7 +1802,8 @@ int bus_iommu_probe(struct bus_type *bus) mutex_lock(&group->mutex); /* Try to allocate default domain */ - probe_alloc_default_domain(bus, group); + iommu_alloc_default_domain( + group, iommu_get_dma_api_group_mandatory_policy(group)); if (!group->default_domain) { mutex_unlock(&group->mutex); @@ -2600,19 +2599,19 @@ void iommu_set_default_passthrough(bool cmd_line) { if (cmd_line) iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; - iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY; + dma_api_default_policy = DMA_API_POLICY_IDENTITY; } void iommu_set_default_translated(bool cmd_line) { if (cmd_line) iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API; - iommu_def_domain_type = IOMMU_DOMAIN_DMA; + dma_api_default_policy = DMA_API_POLICY_STRICT; } bool iommu_default_passthrough(void) { - return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY; + return dma_api_default_policy == DMA_API_POLICY_IDENTITY; } EXPORT_SYMBOL_GPL(iommu_default_passthrough); @@ -2832,103 +2831,40 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid); * group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type * Please take a closer look if intended to use for other purposes. */ -static int iommu_change_dev_def_domain(struct iommu_group *group, - struct device *prev_dev, int type) +static int iommu_change_group_dma_api_policy(struct iommu_group *group, + enum dma_api_policy policy) { - struct iommu_domain *prev_dom; - struct group_device *grp_dev; - int ret, dev_def_dom; - struct device *dev; - - mutex_lock(&group->mutex); - - if (group->default_domain != group->domain) { - dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n"); - ret = -EBUSY; - goto out; - } - - /* - * iommu group wasn't locked while acquiring device lock in - * iommu_group_store_type(). So, make sure that the device count hasn't - * changed while acquiring device lock. - * - * Changing default domain of an iommu group with two or more devices - * isn't supported because there could be a potential deadlock. Consider - * the following scenario. T1 is trying to acquire device locks of all - * the devices in the group and before it could acquire all of them, - * there could be another thread T2 (from different sub-system and use - * case) that has already acquired some of the device locks and might be - * waiting for T1 to release other device locks. - */ - if (iommu_group_device_count(group) != 1) { - dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n"); - ret = -EINVAL; - goto out; - } - - /* Since group has only one device */ - grp_dev = list_first_entry(&group->devices, struct group_device, list); - dev = grp_dev->dev; - - if (prev_dev != dev) { - dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n"); - ret = -EBUSY; - goto out; - } + enum dma_api_policy mandatory = + iommu_get_dma_api_group_mandatory_policy(group); + struct iommu_domain *old_domain; + int ret; - prev_dom = group->default_domain; - if (!prev_dom) { - ret = -EINVAL; - goto out; - } + if (mandatory != DMA_API_POLICY_ANY && policy != mandatory) + return -EINVAL; - dev_def_dom = iommu_get_def_domain_type(dev); - if (!type) { - /* - * If the user hasn't requested any specific type of domain and - * if the device supports both the domains, then default to the - * domain the device was booted with - */ - type = dev_def_dom ? : iommu_def_domain_type; - } else if (dev_def_dom && type != dev_def_dom) { - dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n", - iommu_domain_type_str(type)); - ret = -EINVAL; - goto out; - } + mutex_lock(&group->mutex); - /* - * Switch to a new domain only if the requested domain type is different - * from the existing default domain type - */ - if (prev_dom->type == type) { - ret = 0; - goto out; + /* Shortcut if FQ is being enabled */ + if (group->default_domain->type == IOMMU_DOMAIN_DMA && + policy == DMA_API_POLICY_LAZY) { + ret = iommu_dma_init_fq(group->default_domain); + if (!ret) { + group->default_domain->type = IOMMU_DOMAIN_DMA_FQ; + mutex_unlock(&group->mutex); + return 0; + } } - /* We can bring up a flush queue without tearing down the domain */ - if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) { - ret = iommu_dma_init_fq(prev_dom); - if (!ret) - prev_dom->type = IOMMU_DOMAIN_DMA_FQ; - goto out; + /* Otherwise just replace the whole domain */ + old_domain = group->default_domain; + group->default_domain = NULL; + ret = iommu_alloc_default_domain(group, policy); + if (ret) { + group->default_domain = old_domain; + mutex_unlock(&group->mutex); + return ret; } - - /* Sets group->default_domain to the newly allocated domain */ - ret = iommu_group_alloc_default_domain(dev->bus, group, type); - if (ret) - goto out; - - ret = iommu_create_device_direct_mappings(group, dev); - if (ret) - goto free_new_domain; - - ret = __iommu_attach_device(group->default_domain, dev); - if (ret) - goto free_new_domain; - - group->domain = group->default_domain; + iommu_group_create_direct_mappings(group); /* * Release the mutex here because ops->probe_finalize() call-back of @@ -2938,20 +2874,15 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, */ mutex_unlock(&group->mutex); + /* + * FIXME: How does iommu_setup_dma_ops() get called with the new domain + * on ARM? + */ + /* Make sure dma_ops is appropriatley set */ - iommu_group_do_probe_finalize(dev, group->default_domain); - iommu_domain_free(prev_dom); + __iommu_group_dma_finalize(group); + iommu_domain_free(old_domain); return 0; - -free_new_domain: - iommu_domain_free(group->default_domain); - group->default_domain = prev_dom; - group->domain = prev_dom; - -out: - mutex_unlock(&group->mutex); - - return ret; } /* @@ -2966,9 +2897,8 @@ static int iommu_change_dev_def_domain(struct iommu_group *group, static ssize_t iommu_group_store_type(struct iommu_group *group, const char *buf, size_t count) { - struct group_device *grp_dev; - struct device *dev; - int ret, req_type; + enum dma_api_policy policy; + int ret; if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) return -EACCES; @@ -2977,77 +2907,30 @@ static ssize_t iommu_group_store_type(struct iommu_group *group, return -EINVAL; if (sysfs_streq(buf, "identity")) - req_type = IOMMU_DOMAIN_IDENTITY; + policy = DMA_API_POLICY_IDENTITY; else if (sysfs_streq(buf, "DMA")) - req_type = IOMMU_DOMAIN_DMA; + policy = DMA_API_POLICY_STRICT; else if (sysfs_streq(buf, "DMA-FQ")) - req_type = IOMMU_DOMAIN_DMA_FQ; + policy = DMA_API_POLICY_LAZY; else if (sysfs_streq(buf, "auto")) - req_type = 0; + policy = DMA_API_POLICY_ANY; else return -EINVAL; /* - * Lock/Unlock the group mutex here before device lock to - * 1. Make sure that the iommu group has only one device (this is a - * prerequisite for step 2) - * 2. Get struct *dev which is needed to lock device - */ - mutex_lock(&group->mutex); - if (iommu_group_device_count(group) != 1) { - mutex_unlock(&group->mutex); - pr_err_ratelimited("Cannot change default domain: Group has more than one device\n"); - return -EINVAL; - } - - /* Since group has only one device */ - grp_dev = list_first_entry(&group->devices, struct group_device, list); - dev = grp_dev->dev; - get_device(dev); - - /* - * Don't hold the group mutex because taking group mutex first and then - * the device lock could potentially cause a deadlock as below. Assume - * two threads T1 and T2. T1 is trying to change default domain of an - * iommu group and T2 is trying to hot unplug a device or release [1] VF - * of a PCIe device which is in the same iommu group. T1 takes group - * mutex and before it could take device lock assume T2 has taken device - * lock and is yet to take group mutex. Now, both the threads will be - * waiting for the other thread to release lock. Below, lock order was - * suggested. - * device_lock(dev); - * mutex_lock(&group->mutex); - * iommu_change_dev_def_domain(); - * mutex_unlock(&group->mutex); - * device_unlock(dev); - * - * [1] Typical device release path - * device_lock() from device/driver core code - * -> bus_notifier() - * -> iommu_bus_notifier() - * -> iommu_release_device() - * -> ops->release_device() vendor driver calls back iommu core code - * -> mutex_lock() from iommu core code + * Taking ownership disables the DMA API domain, prevents drivers from + * being attached, and switches to a blocking domain. Releasing + * ownership will put back the new or original DMA API domain. */ - mutex_unlock(&group->mutex); - - /* Check if the device in the group still has a driver bound to it */ - device_lock(dev); - if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ && - group->default_domain->type == IOMMU_DOMAIN_DMA)) { - pr_err_ratelimited("Device is still bound to driver\n"); - ret = -EBUSY; - goto out; - } - - ret = iommu_change_dev_def_domain(group, dev, req_type); - ret = ret ?: count; - -out: - device_unlock(dev); - put_device(dev); + ret = iommu_group_claim_dma_owner(group, &ret); + if (ret) + return ret; - return ret; + ret = iommu_change_group_dma_api_policy(group, policy); + iommu_group_release_dma_owner(group); + if (ret) + return ret; + return count; } static bool iommu_is_default_domain(struct iommu_group *group)