Hi Joao,
On 10/17/2023 4:54 PM, Joao Martins wrote:
On 17/10/2023 09:18, Suthikulpanit, Suravee wrote:
Hi Joao,
On 9/23/2023 8:25 AM, Joao Martins wrote:
...
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 2892aa1b4dc1..099ccb04f52f 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,6 +486,89 @@ static phys_addr_t iommu_v1_iova_to_phys(struct
io_pgtable_ops *ops, unsigned lo
return (__pte & ~offset_mask) | (iova & offset_mask);
}
+static bool pte_test_dirty(u64 *ptep, unsigned long size)
+{
+ bool dirty = false;
+ int i, count;
+
+ /*
+ * 2.2.3.2 Host Dirty Support
+ * When a non-default page size is used , software must OR the
+ * Dirty bits in all of the replicated host PTEs used to map
+ * the page. The IOMMU does not guarantee the Dirty bits are
+ * set in all of the replicated PTEs. Any portion of the page
+ * may have been written even if the Dirty bit is set in only
+ * one of the replicated PTEs.
+ */
+ count = PAGE_SIZE_PTE_COUNT(size);
+ for (i = 0; i < count; i++) {
+ if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *) &ptep[i])) {
+ dirty = true;
+ break;
+ }
+ }
+
+ return dirty;
+}
+
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size)
+{
+ bool dirty = false;
+ int i, count;
+
+ /*
+ * 2.2.3.2 Host Dirty Support
+ * When a non-default page size is used , software must OR the
+ * Dirty bits in all of the replicated host PTEs used to map
+ * the page. The IOMMU does not guarantee the Dirty bits are
+ * set in all of the replicated PTEs. Any portion of the page
+ * may have been written even if the Dirty bit is set in only
+ * one of the replicated PTEs.
+ */
+ count = PAGE_SIZE_PTE_COUNT(size);
+ for (i = 0; i < count; i++)
+ if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
+ (unsigned long *) &ptep[i]))
+ dirty = true;
+
+ return dirty;
+}
Can we consolidate the two functions above where we can pass the flag and check
if IOMMU_DIRTY_NO_CLEAR is set?
I guess so yes -- it was initially to have an efficient tight loop to check all
replicated PTEs, but I think I found a way to merge everything e.g.
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 099ccb04f52f..953f867b4943 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,8 +486,10 @@ static phys_addr_t iommu_v1_iova_to_phys(struct
io_pgtable_ops *ops, unsigned lo
return (__pte & ~offset_mask) | (iova & offset_mask);
}
-static bool pte_test_dirty(u64 *ptep, unsigned long size)
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
+ unsigned long flags)
{
+ bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
bool dirty = false;
int i, count;
@@ -501,35 +503,20 @@ static bool pte_test_dirty(u64 *ptep, unsigned long size)
* one of the replicated PTEs.
*/
count = PAGE_SIZE_PTE_COUNT(size);
- for (i = 0; i < count; i++) {
- if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *) &ptep[i])) {
+ for (i = 0; i < count && test_only; i++) {
+ if (test_bit(IOMMU_PTE_HD_BIT,
+ (unsigned long *) &ptep[i])) {
dirty = true;
break;
}
}
- return dirty;
-}
-
-static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size)
-{
- bool dirty = false;
- int i, count;
-
- /*
- * 2.2.3.2 Host Dirty Support
- * When a non-default page size is used , software must OR the
- * Dirty bits in all of the replicated host PTEs used to map
- * the page. The IOMMU does not guarantee the Dirty bits are
- * set in all of the replicated PTEs. Any portion of the page
- * may have been written even if the Dirty bit is set in only
- * one of the replicated PTEs.
- */
- count = PAGE_SIZE_PTE_COUNT(size);
- for (i = 0; i < count; i++)
+ for (i = 0; i < count && !test_only; i++) {
if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
- (unsigned long *) &ptep[i]))
+ (unsigned long *) &ptep[i])) {
dirty = true;
+ }
+ }
return dirty;
}
@@ -559,9 +546,7 @@ static int iommu_v1_read_and_clear_dirty(struct
io_pgtable_ops *ops,
* Mark the whole IOVA range as dirty even if only one of
* the replicated PTEs were marked dirty.
*/
- if (((flags & IOMMU_DIRTY_NO_CLEAR) &&
- pte_test_dirty(ptep, pgsize)) ||
- pte_test_and_clear_dirty(ptep, pgsize))
+ if (pte_test_and_clear_dirty(ptep, pgsize, flags))
iommu_dirty_bitmap_record(dirty, iova, pgsize);
iova += pgsize;
} while (iova < end);
+
+static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
+ unsigned long iova, size_t size,
+ unsigned long flags,
+ struct iommu_dirty_bitmap *dirty)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long end = iova + size - 1;
+
+ do {
+ unsigned long pgsize = 0;
+ u64 *ptep, pte;
+
+ ptep = fetch_pte(pgtable, iova, &pgsize);
+ if (ptep)
+ pte = READ_ONCE(*ptep);
+ if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
+ pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
+ iova += pgsize;
+ continue;
+ }
+
+ /*
+ * Mark the whole IOVA range as dirty even if only one of
+ * the replicated PTEs were marked dirty.
+ */
+ if (((flags & IOMMU_DIRTY_NO_CLEAR) &&
+ pte_test_dirty(ptep, pgsize)) ||
+ pte_test_and_clear_dirty(ptep, pgsize))
+ iommu_dirty_bitmap_record(dirty, iova, pgsize);
+ iova += pgsize;
+ } while (iova < end);
+
You earlier point made me discover that the test-only case might end up clearing
the PTE unnecessarily. But I have addressed it in the previous comment
Reviewed by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
Thanks,
Suravee