Re: [PATCH v2 4/7] iommu: Let iommu.strict override ops->def_domain_type

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Dec 05, 2022 at 04:34:08PM +0100, Niklas Schnelle wrote:
> On Tue, 2022-11-29 at 16:09 -0400, Jason Gunthorpe wrote:
> > On Tue, Nov 29, 2022 at 06:41:22PM +0000, Robin Murphy wrote:
> > > On 2022-11-29 17:33, Jason Gunthorpe wrote:
> > > > On Mon, Nov 28, 2022 at 09:01:43PM +0000, Robin Murphy wrote:
> > > > 
> > > > > I'm hardly an advocate for trying to save users from themselves, but I
> > > > > honestly can't see any justifiable reason for not having sysfs respect
> > > > > iommu_get_def_domain_type().
> > > > 
> > > > We really need to rename this value if it is not actually just an
> > > > advisory "default" but a functional requirement ..
> > > 
> > > It represents a required default domain type. As in, the type for the
> > > device's default domain. Not the default type for a domain. It's the
> > > iommu_def_domain_type variable that holds the *default* default domain type
> > > ;)
> > 
> > I find the name "default domain" incredibly confusing at this point in
> > time.
> 
> Yes it definitely confused me as evidenced by this patch.

I did some fiddling what this could rename could look like and came
up with this very sketchy sketch. I think it looks a lot clearer in
the end but it seems a bit tricky to break things up into nice patches.

Jason

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index beb9f535d3d815..052caf21fee3dd 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -34,7 +34,12 @@
 static struct kset *iommu_group_kset;
 static DEFINE_IDA(iommu_group_ida);
 
-static unsigned int iommu_def_domain_type __read_mostly;
+static enum dma_api_policy {
+	DMA_API_POLICY_IDENTITY,
+	DMA_API_POLICY_STRICT,
+	DMA_API_POLICY_LAZY,
+	DMA_API_POLICY_ANY,
+} dma_api_default_policy __read_mostly;
 static bool iommu_dma_strict __read_mostly = IS_ENABLED(CONFIG_IOMMU_DEFAULT_DMA_STRICT);
 static u32 iommu_cmd_line __read_mostly;
 
@@ -82,7 +87,7 @@ static const char * const iommu_group_resv_type_string[] = {
 static int iommu_bus_notifier(struct notifier_block *nb,
 			      unsigned long action, void *data);
 static int iommu_alloc_default_domain(struct iommu_group *group,
-				      struct device *dev);
+				      enum dma_api_policy policy);
 static struct iommu_domain *__iommu_domain_alloc(struct bus_type *bus,
 						 unsigned type);
 static int __iommu_attach_device(struct iommu_domain *domain,
@@ -97,6 +102,9 @@ static struct iommu_group *iommu_group_get_for_dev(struct device *dev);
 static ssize_t iommu_group_store_type(struct iommu_group *group,
 				      const char *buf, size_t count);
 
+static enum dma_api_policy
+iommu_get_dma_api_mandatory_policy(struct device *dev);
+
 #define IOMMU_GROUP_ATTR(_name, _mode, _show, _store)		\
 struct iommu_group_attribute iommu_group_attr_##_name =		\
 	__ATTR(_name, _mode, _show, _store)
@@ -125,26 +133,6 @@ static struct bus_type * const iommu_buses[] = {
 #endif
 };
 
-/*
- * Use a function instead of an array here because the domain-type is a
- * bit-field, so an array would waste memory.
- */
-static const char *iommu_domain_type_str(unsigned int t)
-{
-	switch (t) {
-	case IOMMU_DOMAIN_BLOCKED:
-		return "Blocked";
-	case IOMMU_DOMAIN_IDENTITY:
-		return "Passthrough";
-	case IOMMU_DOMAIN_UNMANAGED:
-		return "Unmanaged";
-	case IOMMU_DOMAIN_DMA:
-		return "Translated";
-	default:
-		return "Unknown";
-	}
-}
-
 static int __init iommu_subsys_init(void)
 {
 	struct notifier_block *nb;
@@ -161,19 +149,27 @@ static int __init iommu_subsys_init(void)
 		}
 	}
 
-	if (!iommu_default_passthrough() && !iommu_dma_strict)
-		iommu_def_domain_type = IOMMU_DOMAIN_DMA_FQ;
+	if (dma_api_default_policy == DMA_API_POLICY_LAZY && iommu_dma_strict)
+		dma_api_default_policy = DMA_API_POLICY_STRICT;
 
-	pr_info("Default domain type: %s %s\n",
-		iommu_domain_type_str(iommu_def_domain_type),
-		(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
-			"(set via kernel command line)" : "");
-
-	if (!iommu_default_passthrough())
-		pr_info("DMA domain TLB invalidation policy: %s mode %s\n",
+	switch (dma_api_default_policy) {
+	case DMA_API_POLICY_LAZY:
+	case DMA_API_POLICY_STRICT:
+		pr_info("DMA translated domain TLB invalidation policy: %s mode %s\n",
 			iommu_dma_strict ? "strict" : "lazy",
 			(iommu_cmd_line & IOMMU_CMD_LINE_STRICT) ?
-				"(set via kernel command line)" : "");
+				"(set via kernel command line)" :
+				"");
+		break;
+	case DMA_API_POLICY_IDENTITY:
+		pr_info("Default domain type: Passthrough %s\n",
+			(iommu_cmd_line & IOMMU_CMD_LINE_DMA_API) ?
+				"(set via kernel command line)" :
+				"");
+		break;
+	default:
+		break;
+	}
 
 	nb = kcalloc(ARRAY_SIZE(iommu_buses), sizeof(*nb), GFP_KERNEL);
 	if (!nb)
@@ -353,7 +349,8 @@ int iommu_probe_device(struct device *dev)
 	 * checked.
 	 */
 	mutex_lock(&group->mutex);
-	iommu_alloc_default_domain(group, dev);
+	iommu_alloc_default_domain(group,
+				   iommu_get_dma_api_mandatory_policy(dev));
 
 	/*
 	 * If device joined an existing group which has been claimed, don't
@@ -436,8 +433,8 @@ early_param("iommu.strict", iommu_dma_setup);
 void iommu_set_dma_strict(void)
 {
 	iommu_dma_strict = true;
-	if (iommu_def_domain_type == IOMMU_DOMAIN_DMA_FQ)
-		iommu_def_domain_type = IOMMU_DOMAIN_DMA;
+	if (dma_api_default_policy == DMA_API_POLICY_LAZY)
+		dma_api_default_policy = DMA_API_POLICY_STRICT;
 }
 
 static ssize_t iommu_group_attr_show(struct kobject *kobj,
@@ -1557,53 +1554,100 @@ struct iommu_group *fsl_mc_device_group(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(fsl_mc_device_group);
 
-static int iommu_get_def_domain_type(struct device *dev)
+static enum dma_api_policy
+iommu_get_dma_api_mandatory_policy(struct device *dev)
 {
 	const struct iommu_ops *ops = dev_iommu_ops(dev);
 
 	if (dev_is_pci(dev) && to_pci_dev(dev)->untrusted)
-		return IOMMU_DOMAIN_DMA;
+		return DMA_API_POLICY_STRICT;
 
 	if (ops->def_domain_type)
 		return ops->def_domain_type(dev);
-
-	return 0;
+	return DMA_API_POLICY_ANY;
 }
 
-static int iommu_group_alloc_default_domain(struct bus_type *bus,
-					    struct iommu_group *group,
-					    unsigned int type)
+static int __iommu_get_dma_api_group_mandatory_policy(struct device *dev,
+						      void *data)
 {
-	struct iommu_domain *dom;
+	enum dma_api_policy *policy = data;
+	enum dma_api_policy dev_policy =
+		iommu_get_dma_api_mandatory_policy(dev);
 
-	dom = __iommu_domain_alloc(bus, type);
-	if (!dom && type != IOMMU_DOMAIN_DMA) {
-		dom = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
-		if (dom)
-			pr_warn("Failed to allocate default IOMMU domain of type %u for group %s - Falling back to IOMMU_DOMAIN_DMA",
-				type, group->name);
+	if (dev_policy == DMA_API_POLICY_ANY || *policy == dev_policy)
+		return 0;
+	if (*policy == DMA_API_POLICY_ANY) {
+		*policy = dev_policy;
+		return 0;
 	}
 
-	if (!dom)
-		return -ENOMEM;
+	dev_warn(
+		dev,
+		"IOMMU driver is requesting different DMA API policies for devices in the same group %s.",
+		dev->iommu_group->name);
 
-	group->default_domain = dom;
-	if (!group->domain)
-		group->domain = dom;
+	/*
+	 * Resolve the conflict by priority, the lower numbers in the enum are
+	 * higher preference in case of driver problems.
+	 */
+	if (dev_policy < *policy)
+		*policy = dev_policy;
 	return 0;
 }
 
+static enum dma_api_policy
+iommu_get_dma_api_group_mandatory_policy(struct iommu_group *group)
+{
+	enum dma_api_policy policy = DMA_API_POLICY_ANY;
+
+	__iommu_group_for_each_dev(group, &policy,
+				   __iommu_get_dma_api_group_mandatory_policy);
+	return policy;
+}
+
 static int iommu_alloc_default_domain(struct iommu_group *group,
-				      struct device *dev)
+				      enum dma_api_policy policy)
 {
-	unsigned int type;
+	struct iommu_domain *domain;
+	struct group_device *grp_dev =
+		list_first_entry(&group->devices, struct group_device, list);
+	struct bus_type *bus = grp_dev->dev->bus;
+
+	lockdep_assert_held(group->mutex);
 
 	if (group->default_domain)
 		return 0;
+	if (policy == DMA_API_POLICY_ANY)
+		policy = dma_api_default_policy;
+	switch (policy) {
+		case DMA_API_POLICY_IDENTITY:
+		domain = __iommu_domain_alloc(bus, IOMMU_DOMAIN_IDENTITY);
+		break;
+
+		case DMA_API_POLICY_LAZY:
+		domain = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
+		if (domain && !domain->ops->prefer_dma_api_fq){
+			pr_warn("Failed to allocate default lazy IOMMU domain for group %s - Falling back to strict",
+				group->name);
+		}
+		break;
 
-	type = iommu_get_def_domain_type(dev) ? : iommu_def_domain_type;
+		case DMA_API_POLICY_STRICT:
+		domain = __iommu_domain_alloc(bus, IOMMU_DOMAIN_DMA);
+		break;
 
-	return iommu_group_alloc_default_domain(dev->bus, group, type);
+		default:
+		WARN_ON(true);
+		domain = NULL;
+	}
+
+	if (!domain)
+		return -ENOMEM;
+
+	group->default_domain = domain;
+	if (!group->domain)
+		group->domain = domain;
+	return 0;
 }
 
 /**
@@ -1688,52 +1732,6 @@ static int iommu_bus_notifier(struct notifier_block *nb,
 	return 0;
 }
 
-struct __group_domain_type {
-	struct device *dev;
-	unsigned int type;
-};
-
-static int probe_get_default_domain_type(struct device *dev, void *data)
-{
-	struct __group_domain_type *gtype = data;
-	unsigned int type = iommu_get_def_domain_type(dev);
-
-	if (type) {
-		if (gtype->type && gtype->type != type) {
-			dev_warn(dev, "Device needs domain type %s, but device %s in the same iommu group requires type %s - using default\n",
-				 iommu_domain_type_str(type),
-				 dev_name(gtype->dev),
-				 iommu_domain_type_str(gtype->type));
-			gtype->type = 0;
-		}
-
-		if (!gtype->dev) {
-			gtype->dev  = dev;
-			gtype->type = type;
-		}
-	}
-
-	return 0;
-}
-
-static void probe_alloc_default_domain(struct bus_type *bus,
-				       struct iommu_group *group)
-{
-	struct __group_domain_type gtype;
-
-	memset(&gtype, 0, sizeof(gtype));
-
-	/* Ask for default domain requirements of all devices in the group */
-	__iommu_group_for_each_dev(group, &gtype,
-				   probe_get_default_domain_type);
-
-	if (!gtype.type)
-		gtype.type = iommu_def_domain_type;
-
-	iommu_group_alloc_default_domain(bus, group, gtype.type);
-
-}
-
 static int iommu_group_do_dma_attach(struct device *dev, void *data)
 {
 	struct iommu_domain *domain = data;
@@ -1804,7 +1802,8 @@ int bus_iommu_probe(struct bus_type *bus)
 		mutex_lock(&group->mutex);
 
 		/* Try to allocate default domain */
-		probe_alloc_default_domain(bus, group);
+		iommu_alloc_default_domain(
+			group, iommu_get_dma_api_group_mandatory_policy(group));
 
 		if (!group->default_domain) {
 			mutex_unlock(&group->mutex);
@@ -2600,19 +2599,19 @@ void iommu_set_default_passthrough(bool cmd_line)
 {
 	if (cmd_line)
 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
-	iommu_def_domain_type = IOMMU_DOMAIN_IDENTITY;
+	dma_api_default_policy = DMA_API_POLICY_IDENTITY;
 }
 
 void iommu_set_default_translated(bool cmd_line)
 {
 	if (cmd_line)
 		iommu_cmd_line |= IOMMU_CMD_LINE_DMA_API;
-	iommu_def_domain_type = IOMMU_DOMAIN_DMA;
+	dma_api_default_policy = DMA_API_POLICY_STRICT;
 }
 
 bool iommu_default_passthrough(void)
 {
-	return iommu_def_domain_type == IOMMU_DOMAIN_IDENTITY;
+	return dma_api_default_policy == DMA_API_POLICY_IDENTITY;
 }
 EXPORT_SYMBOL_GPL(iommu_default_passthrough);
 
@@ -2832,103 +2831,40 @@ EXPORT_SYMBOL_GPL(iommu_sva_get_pasid);
  *    group's default domain type through /sys/kernel/iommu_groups/<grp_id>/type
  *    Please take a closer look if intended to use for other purposes.
  */
-static int iommu_change_dev_def_domain(struct iommu_group *group,
-				       struct device *prev_dev, int type)
+static int iommu_change_group_dma_api_policy(struct iommu_group *group,
+					     enum dma_api_policy policy)
 {
-	struct iommu_domain *prev_dom;
-	struct group_device *grp_dev;
-	int ret, dev_def_dom;
-	struct device *dev;
-
-	mutex_lock(&group->mutex);
-
-	if (group->default_domain != group->domain) {
-		dev_err_ratelimited(prev_dev, "Group not assigned to default domain\n");
-		ret = -EBUSY;
-		goto out;
-	}
-
-	/*
-	 * iommu group wasn't locked while acquiring device lock in
-	 * iommu_group_store_type(). So, make sure that the device count hasn't
-	 * changed while acquiring device lock.
-	 *
-	 * Changing default domain of an iommu group with two or more devices
-	 * isn't supported because there could be a potential deadlock. Consider
-	 * the following scenario. T1 is trying to acquire device locks of all
-	 * the devices in the group and before it could acquire all of them,
-	 * there could be another thread T2 (from different sub-system and use
-	 * case) that has already acquired some of the device locks and might be
-	 * waiting for T1 to release other device locks.
-	 */
-	if (iommu_group_device_count(group) != 1) {
-		dev_err_ratelimited(prev_dev, "Cannot change default domain: Group has more than one device\n");
-		ret = -EINVAL;
-		goto out;
-	}
-
-	/* Since group has only one device */
-	grp_dev = list_first_entry(&group->devices, struct group_device, list);
-	dev = grp_dev->dev;
-
-	if (prev_dev != dev) {
-		dev_err_ratelimited(prev_dev, "Cannot change default domain: Device has been changed\n");
-		ret = -EBUSY;
-		goto out;
-	}
+	enum dma_api_policy mandatory =
+		iommu_get_dma_api_group_mandatory_policy(group);
+	struct iommu_domain *old_domain;
+	int ret;
 
-	prev_dom = group->default_domain;
-	if (!prev_dom) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (mandatory != DMA_API_POLICY_ANY && policy != mandatory)
+		return -EINVAL;
 
-	dev_def_dom = iommu_get_def_domain_type(dev);
-	if (!type) {
-		/*
-		 * If the user hasn't requested any specific type of domain and
-		 * if the device supports both the domains, then default to the
-		 * domain the device was booted with
-		 */
-		type = dev_def_dom ? : iommu_def_domain_type;
-	} else if (dev_def_dom && type != dev_def_dom) {
-		dev_err_ratelimited(prev_dev, "Device cannot be in %s domain\n",
-				    iommu_domain_type_str(type));
-		ret = -EINVAL;
-		goto out;
-	}
+	mutex_lock(&group->mutex);
 
-	/*
-	 * Switch to a new domain only if the requested domain type is different
-	 * from the existing default domain type
-	 */
-	if (prev_dom->type == type) {
-		ret = 0;
-		goto out;
+	/* Shortcut if FQ is being enabled */
+	if (group->default_domain->type == IOMMU_DOMAIN_DMA &&
+	    policy == DMA_API_POLICY_LAZY) {
+		ret = iommu_dma_init_fq(group->default_domain);
+		if (!ret) {
+			group->default_domain->type = IOMMU_DOMAIN_DMA_FQ;
+			mutex_unlock(&group->mutex);
+			return 0;
+		}
 	}
 
-	/* We can bring up a flush queue without tearing down the domain */
-	if (type == IOMMU_DOMAIN_DMA_FQ && prev_dom->type == IOMMU_DOMAIN_DMA) {
-		ret = iommu_dma_init_fq(prev_dom);
-		if (!ret)
-			prev_dom->type = IOMMU_DOMAIN_DMA_FQ;
-		goto out;
+	/* Otherwise just replace the whole domain */
+	old_domain = group->default_domain;
+	group->default_domain = NULL;
+	ret = iommu_alloc_default_domain(group, policy);
+	if (ret) {
+		group->default_domain = old_domain;
+		mutex_unlock(&group->mutex);
+		return ret;
 	}
-
-	/* Sets group->default_domain to the newly allocated domain */
-	ret = iommu_group_alloc_default_domain(dev->bus, group, type);
-	if (ret)
-		goto out;
-
-	ret = iommu_create_device_direct_mappings(group, dev);
-	if (ret)
-		goto free_new_domain;
-
-	ret = __iommu_attach_device(group->default_domain, dev);
-	if (ret)
-		goto free_new_domain;
-
-	group->domain = group->default_domain;
+	iommu_group_create_direct_mappings(group);
 
 	/*
 	 * Release the mutex here because ops->probe_finalize() call-back of
@@ -2938,20 +2874,15 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
 	 */
 	mutex_unlock(&group->mutex);
 
+	/*
+	 * FIXME: How does iommu_setup_dma_ops() get called with the new domain
+	 * on ARM?
+	 */
+
 	/* Make sure dma_ops is appropriatley set */
-	iommu_group_do_probe_finalize(dev, group->default_domain);
-	iommu_domain_free(prev_dom);
+	__iommu_group_dma_finalize(group);
+	iommu_domain_free(old_domain);
 	return 0;
-
-free_new_domain:
-	iommu_domain_free(group->default_domain);
-	group->default_domain = prev_dom;
-	group->domain = prev_dom;
-
-out:
-	mutex_unlock(&group->mutex);
-
-	return ret;
 }
 
 /*
@@ -2966,9 +2897,8 @@ static int iommu_change_dev_def_domain(struct iommu_group *group,
 static ssize_t iommu_group_store_type(struct iommu_group *group,
 				      const char *buf, size_t count)
 {
-	struct group_device *grp_dev;
-	struct device *dev;
-	int ret, req_type;
+	enum dma_api_policy policy;
+	int ret;
 
 	if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
 		return -EACCES;
@@ -2977,77 +2907,30 @@ static ssize_t iommu_group_store_type(struct iommu_group *group,
 		return -EINVAL;
 
 	if (sysfs_streq(buf, "identity"))
-		req_type = IOMMU_DOMAIN_IDENTITY;
+		policy = DMA_API_POLICY_IDENTITY;
 	else if (sysfs_streq(buf, "DMA"))
-		req_type = IOMMU_DOMAIN_DMA;
+		policy = DMA_API_POLICY_STRICT;
 	else if (sysfs_streq(buf, "DMA-FQ"))
-		req_type = IOMMU_DOMAIN_DMA_FQ;
+		policy = DMA_API_POLICY_LAZY;
 	else if (sysfs_streq(buf, "auto"))
-		req_type = 0;
+		policy = DMA_API_POLICY_ANY;
 	else
 		return -EINVAL;
 
 	/*
-	 * Lock/Unlock the group mutex here before device lock to
-	 * 1. Make sure that the iommu group has only one device (this is a
-	 *    prerequisite for step 2)
-	 * 2. Get struct *dev which is needed to lock device
-	 */
-	mutex_lock(&group->mutex);
-	if (iommu_group_device_count(group) != 1) {
-		mutex_unlock(&group->mutex);
-		pr_err_ratelimited("Cannot change default domain: Group has more than one device\n");
-		return -EINVAL;
-	}
-
-	/* Since group has only one device */
-	grp_dev = list_first_entry(&group->devices, struct group_device, list);
-	dev = grp_dev->dev;
-	get_device(dev);
-
-	/*
-	 * Don't hold the group mutex because taking group mutex first and then
-	 * the device lock could potentially cause a deadlock as below. Assume
-	 * two threads T1 and T2. T1 is trying to change default domain of an
-	 * iommu group and T2 is trying to hot unplug a device or release [1] VF
-	 * of a PCIe device which is in the same iommu group. T1 takes group
-	 * mutex and before it could take device lock assume T2 has taken device
-	 * lock and is yet to take group mutex. Now, both the threads will be
-	 * waiting for the other thread to release lock. Below, lock order was
-	 * suggested.
-	 * device_lock(dev);
-	 *	mutex_lock(&group->mutex);
-	 *		iommu_change_dev_def_domain();
-	 *	mutex_unlock(&group->mutex);
-	 * device_unlock(dev);
-	 *
-	 * [1] Typical device release path
-	 * device_lock() from device/driver core code
-	 *  -> bus_notifier()
-	 *   -> iommu_bus_notifier()
-	 *    -> iommu_release_device()
-	 *     -> ops->release_device() vendor driver calls back iommu core code
-	 *      -> mutex_lock() from iommu core code
+	 * Taking ownership disables the DMA API domain, prevents drivers from
+	 * being attached, and switches to a blocking domain. Releasing
+	 * ownership will put back the new or original DMA API domain.
 	 */
-	mutex_unlock(&group->mutex);
-
-	/* Check if the device in the group still has a driver bound to it */
-	device_lock(dev);
-	if (device_is_bound(dev) && !(req_type == IOMMU_DOMAIN_DMA_FQ &&
-	    group->default_domain->type == IOMMU_DOMAIN_DMA)) {
-		pr_err_ratelimited("Device is still bound to driver\n");
-		ret = -EBUSY;
-		goto out;
-	}
-
-	ret = iommu_change_dev_def_domain(group, dev, req_type);
-	ret = ret ?: count;
-
-out:
-	device_unlock(dev);
-	put_device(dev);
+	ret = iommu_group_claim_dma_owner(group, &ret);
+	if (ret)
+		return ret;
 
-	return ret;
+	ret = iommu_change_group_dma_api_policy(group, policy);
+	iommu_group_release_dma_owner(group);
+	if (ret)
+		return ret;
+	return count;
 }
 
 static bool iommu_is_default_domain(struct iommu_group *group)



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Kernel Development]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite Info]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Linux Media]     [Device Mapper]

  Powered by Linux