Hi Joao,
On 9/23/2023 8:25 AM, Joao Martins wrote:
...
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 2892aa1b4dc1..099ccb04f52f 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,6 +486,89 @@ static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned lo
return (__pte & ~offset_mask) | (iova & offset_mask);
}
+static bool pte_test_dirty(u64 *ptep, unsigned long size)
+{
+ bool dirty = false;
+ int i, count;
+
+ /*
+ * 2.2.3.2 Host Dirty Support
+ * When a non-default page size is used , software must OR the
+ * Dirty bits in all of the replicated host PTEs used to map
+ * the page. The IOMMU does not guarantee the Dirty bits are
+ * set in all of the replicated PTEs. Any portion of the page
+ * may have been written even if the Dirty bit is set in only
+ * one of the replicated PTEs.
+ */
+ count = PAGE_SIZE_PTE_COUNT(size);
+ for (i = 0; i < count; i++) {
+ if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *) &ptep[i])) {
+ dirty = true;
+ break;
+ }
+ }
+
+ return dirty;
+}
+
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size)
+{
+ bool dirty = false;
+ int i, count;
+
+ /*
+ * 2.2.3.2 Host Dirty Support
+ * When a non-default page size is used , software must OR the
+ * Dirty bits in all of the replicated host PTEs used to map
+ * the page. The IOMMU does not guarantee the Dirty bits are
+ * set in all of the replicated PTEs. Any portion of the page
+ * may have been written even if the Dirty bit is set in only
+ * one of the replicated PTEs.
+ */
+ count = PAGE_SIZE_PTE_COUNT(size);
+ for (i = 0; i < count; i++)
+ if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
+ (unsigned long *) &ptep[i]))
+ dirty = true;
+
+ return dirty;
+}
Can we consolidate the two functions above where we can pass the flag
and check if IOMMU_DIRTY_NO_CLEAR is set?
+
+static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long end = iova + size - 1;
+
+ do {
+ unsigned long pgsize = 0;
+ u64 *ptep, pte;
+
+ ptep = fetch_pte(pgtable, iova, &pgsize);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
+ pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
+ iova += pgsize;
+ continue;
+ }
+
+ /*
+ * Mark the whole IOVA range as dirty even if only one of
+ * the replicated PTEs were marked dirty.
+ */
+ if (((flags & IOMMU_DIRTY_NO_CLEAR) &&
+ pte_test_dirty(ptep, pgsize)) ||
+ pte_test_and_clear_dirty(ptep, pgsize))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
+ return 0;
+}
+
/*
* ----------------------------------------------------
*/
@@ -527,6 +610,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo
pgtable->iop.ops.map_pages = iommu_v1_map_pages;
pgtable->iop.ops.unmap_pages = iommu_v1_unmap_pages;
pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+ pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty;
return &pgtable->iop;
}
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index af36c627022f..31b333cc6fe1 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
....
@@ -2156,11 +2160,17 @@ static inline u64 dma_max_address(void)
return ((1ULL << PM_LEVEL_SHIFT(amd_iommu_gpt_level)) - 1);
}
+static bool amd_iommu_hd_support(struct amd_iommu *iommu)
+{
+ return iommu && (iommu->features & FEATURE_HDSUP);
+}
+
You can use the newly introduced check_feature(u64 mask) to check the HD
support.
(See
https://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git/commit/?h=next&id=7b7563a93437ef945c829538da28f0095f1603ec)
...
@@ -2252,6 +2268,9 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return 0;
dev_data->defer_attach = false;
+ if (dom->dirty_ops && iommu &&
+ !(iommu->features & FEATURE_HDSUP))
if (dom->dirty_ops && !check_feature(FEATURE_HDSUP))
+ return -EINVAL;
if (dev_data->domain)
detach_device(dev);
@@ -2371,6 +2390,11 @@ static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap)
return true;
case IOMMU_CAP_DEFERRED_FLUSH:
return true;
+ case IOMMU_CAP_DIRTY: {
+ struct amd_iommu *iommu = rlookup_amd_iommu(dev);
+
+ return amd_iommu_hd_support(iommu);
return check_feature(FEATURE_HDSUP);
Thanks,
Suravee