Re: [PATCH v3 19/19] iommu/intel: Access/Dirty bit support for SL domains

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 17/10/2023 03:08, Baolu Lu wrote:
> On 10/17/23 12:00 AM, Joao Martins wrote:
>>>> The iommu_dirty_bitmap is defined in iommu core. The iommu driver has no
>>>> need to understand it and check its member anyway.
>>>>
>>> (...) The iommu driver has no need to understand it. iommu_dirty_bitmap_record()
>>> already makes those checks in case there's no iova_bitmap to set bits to.
>>>
>> This is all true but the reason I am checking iommu_dirty_bitmap::bitmap is to
>> essentially not record anything in the iova bitmap and just clear the dirty bits
>> from the IOPTEs, all when dirty tracking is technically disabled. This is done
>> internally only when starting dirty tracking, and thus to ensure that we cleanup
>> all dirty bits before we enable dirty tracking to have a consistent snapshot as
>> opposed to inheriting dirties from the past.
> 
> It's okay since it serves a functional purpose. Can you please add some
> comments around the code to explain the rationale.
> 

I added this comment below:

+       /*
+        * IOMMUFD core calls into a dirty tracking disabled domain without an
+        * IOVA bitmap set in order to clean dirty bits in all PTEs that might
+        * have occured when we stopped dirty tracking. This ensures that we
+        * never inherit dirtied bits from a previous cycle.
+        */

Also fixed an issue where I could theoretically clear the bit with
IOMMU_NO_CLEAR. Essentially passed the read_and_clear_dirty flags and let
dma_sl_pte_test_and_clear_dirty() to test and test-and-clear, similar to AMD:

@@ -781,6 +788,16 @@ static inline bool dma_pte_present(struct dma_pte *pte)
        return (pte->val & 3) != 0;
 }

+static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
+                                                  unsigned long flags)
+{
+       if (flags & IOMMU_DIRTY_NO_CLEAR)
+               return (pte->val & DMA_SL_PTE_DIRTY) != 0;
+
+       return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
+                                 (unsigned long *)&pte->val);
+}
+

Anyhow, see below the full diff compared to this patch. Some things are in tree
that is different to submitted from this patch.

diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig
index 2e56bd79f589..dedb8ae3cba8 100644
--- a/drivers/iommu/intel/Kconfig
+++ b/drivers/iommu/intel/Kconfig
@@ -15,6 +15,7 @@ config INTEL_IOMMU
 	select DMA_OPS
 	select IOMMU_API
 	select IOMMU_IOVA
+	select IOMMUFD_DRIVER
 	select NEED_DMA_MAP_STATE
 	select DMAR_TABLE
 	select SWIOTLB
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index fabfe363f1f9..0e3f532f3bca 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4075,11 +4075,6 @@ static struct iommu_domain
*intel_iommu_domain_alloc(unsigned type)
 	return NULL;
 }

-static bool intel_iommu_slads_supported(struct intel_iommu *iommu)
-{
-	return sm_supported(iommu) && ecap_slads(iommu->ecap);
-}
-
 static struct iommu_domain *
 intel_iommu_domain_alloc_user(struct device *dev, u32 flags)
 {
@@ -4087,6 +4082,10 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags)
 	struct iommu_domain *domain;
 	struct intel_iommu *iommu;

+	if (flags & (~(IOMMU_HWPT_ALLOC_NEST_PARENT|
+		       IOMMU_HWPT_ALLOC_ENFORCE_DIRTY)))
+		return ERR_PTR(-EOPNOTSUPP);
+
 	iommu = device_to_iommu(dev, NULL, NULL);
 	if (!iommu)
 		return ERR_PTR(-ENODEV);
@@ -4094,15 +4093,26 @@ intel_iommu_domain_alloc_user(struct device *dev, u32 flags)
 	if ((flags & IOMMU_HWPT_ALLOC_NEST_PARENT) && !ecap_nest(iommu->ecap))
 		return ERR_PTR(-EOPNOTSUPP);

-	if (enforce_dirty &&
-	    !intel_iommu_slads_supported(iommu))
+	if (enforce_dirty && !slads_supported(iommu))
 		return ERR_PTR(-EOPNOTSUPP);

+	/*
+	 * domain_alloc_user op needs to fully initialize a domain
+	 * before return, so uses iommu_domain_alloc() here for
+	 * simple.
+	 */
 	domain = iommu_domain_alloc(dev->bus);
 	if (!domain)
 		domain = ERR_PTR(-ENOMEM);
-	if (domain && enforce_dirty)
+
+	if (!IS_ERR(domain) && enforce_dirty) {
+		if (to_dmar_domain(domain)->use_first_level) {
+			iommu_domain_free(domain);
+			return ERR_PTR(-EOPNOTSUPP);
+		}
 		domain->dirty_ops = &intel_dirty_ops;
+	}
+
 	return domain;
 }

@@ -4113,7 +4123,7 @@ static void intel_iommu_domain_free(struct iommu_domain
*domain)
 }

 static int prepare_domain_attach_device(struct iommu_domain *domain,
-					struct device *dev)
+					struct device *dev, ioasid_t pasid)
 {
 	struct dmar_domain *dmar_domain = to_dmar_domain(domain);
 	struct intel_iommu *iommu;
@@ -4126,7 +4136,8 @@ static int prepare_domain_attach_device(struct
iommu_domain *domain,
 	if (dmar_domain->force_snooping && !ecap_sc_support(iommu->ecap))
 		return -EINVAL;

-	if (domain->dirty_ops && !intel_iommu_slads_supported(iommu))
+	if (domain->dirty_ops &&
+	    (!slads_supported(iommu) || pasid != IOMMU_NO_PASID))
 		return -EINVAL;

 	/* check if this iommu agaw is sufficient for max mapped address */
@@ -4164,7 +4175,7 @@ static int intel_iommu_attach_device(struct iommu_domain
*domain,
 	if (info->domain)
 		device_block_translation(dev);

-	ret = prepare_domain_attach_device(domain, dev);
+	ret = prepare_domain_attach_device(domain, dev, IOMMU_NO_PASID);
 	if (ret)
 		return ret;

@@ -4384,7 +4395,7 @@ static bool intel_iommu_capable(struct device *dev, enum
iommu_cap cap)
 	case IOMMU_CAP_ENFORCE_CACHE_COHERENCY:
 		return ecap_sc_support(info->iommu->ecap);
 	case IOMMU_CAP_DIRTY:
-		return intel_iommu_slads_supported(info->iommu);
+		return slads_supported(info->iommu);
 	default:
 		return false;
 	}
@@ -4785,7 +4796,7 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain
*domain,
 	if (context_copied(iommu, info->bus, info->devfn))
 		return -EBUSY;

-	ret = prepare_domain_attach_device(domain, dev);
+	ret = prepare_domain_attach_device(domain, dev, pasid);
 	if (ret)
 		return ret;

@@ -4848,31 +4859,31 @@ static int intel_iommu_set_dirty_tracking(struct
iommu_domain *domain,
 	int ret = -EINVAL;

 	spin_lock(&dmar_domain->lock);
-	if (!(dmar_domain->dirty_tracking ^ enable) ||
-	    list_empty(&dmar_domain->devices)) {
-		spin_unlock(&dmar_domain->lock);
-		return 0;
-	}
+	if (dmar_domain->dirty_tracking == enable)
+		goto out_unlock;

 	list_for_each_entry(info, &dmar_domain->devices, link) {
-		/* First-level page table always enables dirty bit*/
-		if (dmar_domain->use_first_level) {
-			ret = 0;
-			break;
-		}
-
 		ret = intel_pasid_setup_dirty_tracking(info->iommu, info->domain,
 						     info->dev, IOMMU_NO_PASID,
 						     enable);
 		if (ret)
-			break;
+			goto err_unwind;

 	}

 	if (!ret)
 		dmar_domain->dirty_tracking = enable;
+out_unlock:
 	spin_unlock(&dmar_domain->lock);

+	return 0;
+
+err_unwind:
+         list_for_each_entry(info, &dmar_domain->devices, link)
+                 intel_pasid_setup_dirty_tracking(info->iommu, dmar_domain,
+						  info->dev, IOMMU_NO_PASID,
+						  dmar_domain->dirty_tracking);
+	spin_unlock(&dmar_domain->lock);
 	return ret;
 }

@@ -4886,14 +4897,16 @@ static int intel_iommu_read_and_clear_dirty(struct
iommu_domain *domain,
 	unsigned long pgsize;
 	bool ad_enabled;

-	spin_lock(&dmar_domain->lock);
+	/*
+	 * IOMMUFD core calls into a dirty tracking disabled domain without an
+	 * IOVA bitmap set in order to clean dirty bits in all PTEs that might
+	 * have occured when we stopped dirty tracking. This ensures that we
+	 * never inherit dirtied bits from a previous cycle.
+	 */
 	ad_enabled = dmar_domain->dirty_tracking;
-	spin_unlock(&dmar_domain->lock);
-
 	if (!ad_enabled && dirty->bitmap)
 		return -EINVAL;

-	rcu_read_lock();
 	do {
 		struct dma_pte *pte;
 		int lvl = 0;
@@ -4906,14 +4919,10 @@ static int intel_iommu_read_and_clear_dirty(struct
iommu_domain *domain,
 			continue;
 		}

-		/* It is writable, set the bitmap */
-		if (((flags & IOMMU_DIRTY_NO_CLEAR) &&
-				dma_sl_pte_dirty(pte)) ||
-		    dma_sl_pte_test_and_clear_dirty(pte))
+		if (dma_sl_pte_test_and_clear_dirty(pte, flags))
 			iommu_dirty_bitmap_record(dirty, iova, pgsize);
 		iova += pgsize;
 	} while (iova < end);
-	rcu_read_unlock();

 	return 0;
 }
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index bccd44db3316..0b390d9e669b 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -542,6 +542,9 @@ enum {
 #define sm_supported(iommu)	(intel_iommu_sm && ecap_smts((iommu)->ecap))
 #define pasid_supported(iommu)	(sm_supported(iommu) &&			\
 				 ecap_pasid((iommu)->ecap))
+#define slads_supported(iommu) (sm_supported(iommu) &&                 \
+                                ecap_slads((iommu)->ecap))
+

 struct pasid_entry;
 struct pasid_state_entry;
@@ -785,13 +788,12 @@ static inline bool dma_pte_present(struct dma_pte *pte)
 	return (pte->val & 3) != 0;
 }

-static inline bool dma_sl_pte_dirty(struct dma_pte *pte)
+static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte,
+						   unsigned long flags)
 {
-	return (pte->val & DMA_SL_PTE_DIRTY) != 0;
-}
+	if (flags & IOMMU_DIRTY_NO_CLEAR)
+		return (pte->val & DMA_SL_PTE_DIRTY) != 0;

-static inline bool dma_sl_pte_test_and_clear_dirty(struct dma_pte *pte)
-{
 	return test_and_clear_bit(DMA_SL_PTE_DIRTY_BIT,
 				  (unsigned long *)&pte->val);
 }
diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c
index 03814942d59c..785384a59d55 100644
--- a/drivers/iommu/intel/pasid.c
+++ b/drivers/iommu/intel/pasid.c
@@ -686,15 +686,29 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu
*iommu,

 	spin_lock(&iommu->lock);

-	did = domain_id_iommu(domain, iommu);
 	pte = intel_pasid_get_entry(dev, pasid);
 	if (!pte) {
 		spin_unlock(&iommu->lock);
-		dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid);
+		dev_err_ratelimited(dev,
+				    "Failed to get pasid entry of PASID %d\n",
+				    pasid);
 		return -ENODEV;
 	}

+	did = domain_id_iommu(domain, iommu);
 	pgtt = pasid_pte_get_pgtt(pte);
+	if (pgtt != PASID_ENTRY_PGTT_SL_ONLY && pgtt != PASID_ENTRY_PGTT_NESTED) {
+		spin_unlock(&iommu->lock);
+		dev_err_ratelimited(dev,
+				    "Dirty tracking not supported on translation type %d\n",
+				    pgtt);
+		return -EOPNOTSUPP;
+	}
+
+	if (pasid_get_ssade(pte) == enabled) {
+		spin_unlock(&iommu->lock);
+		return 0;
+	}

 	if (enabled)
 		pasid_set_ssade(pte);
@@ -702,6 +716,9 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,
 		pasid_clear_ssade(pte);
 	spin_unlock(&iommu->lock);

+	if (!ecap_coherent(iommu->ecap))
+		clflush_cache_range(pte, sizeof(*pte));
+
 	/*
 	 * From VT-d spec table 25 "Guidance to Software for Invalidations":
 	 *
@@ -720,8 +737,6 @@ int intel_pasid_setup_dirty_tracking(struct intel_iommu *iommu,

 	if (pgtt == PASID_ENTRY_PGTT_SL_ONLY || pgtt == PASID_ENTRY_PGTT_NESTED)
 		iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH);
-	else
-		qi_flush_piotlb(iommu, did, pasid, 0, -1, 0);

 	/* Device IOTLB doesn't need to be flushed in caching mode. */
 	if (!cap_caching_mode(iommu->cap))



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux