Re: [PATCH v3 17/19] iommu/amd: Access/Dirty bit support in IOPTEs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Joao,

On 10/17/2023 4:54 PM, Joao Martins wrote:
On 17/10/2023 09:18, Suthikulpanit, Suravee wrote:
Hi Joao,

On 9/23/2023 8:25 AM, Joao Martins wrote:
...
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 2892aa1b4dc1..099ccb04f52f 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,6 +486,89 @@ static phys_addr_t iommu_v1_iova_to_phys(struct
io_pgtable_ops *ops, unsigned lo
       return (__pte & ~offset_mask) | (iova & offset_mask);
   }
   +static bool pte_test_dirty(u64 *ptep, unsigned long size)
+{
+    bool dirty = false;
+    int i, count;
+
+    /*
+     * 2.2.3.2 Host Dirty Support
+     * When a non-default page size is used , software must OR the
+     * Dirty bits in all of the replicated host PTEs used to map
+     * the page. The IOMMU does not guarantee the Dirty bits are
+     * set in all of the replicated PTEs. Any portion of the page
+     * may have been written even if the Dirty bit is set in only
+     * one of the replicated PTEs.
+     */
+    count = PAGE_SIZE_PTE_COUNT(size);
+    for (i = 0; i < count; i++) {
+        if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *) &ptep[i])) {
+            dirty = true;
+            break;
+        }
+    }
+
+    return dirty;
+}
+
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size)
+{
+    bool dirty = false;
+    int i, count;
+
+    /*
+     * 2.2.3.2 Host Dirty Support
+     * When a non-default page size is used , software must OR the
+     * Dirty bits in all of the replicated host PTEs used to map
+     * the page. The IOMMU does not guarantee the Dirty bits are
+     * set in all of the replicated PTEs. Any portion of the page
+     * may have been written even if the Dirty bit is set in only
+     * one of the replicated PTEs.
+     */
+    count = PAGE_SIZE_PTE_COUNT(size);
+    for (i = 0; i < count; i++)
+        if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
+                    (unsigned long *) &ptep[i]))
+            dirty = true;
+
+    return dirty;
+}

Can we consolidate the two functions above where we can pass the flag and check
if IOMMU_DIRTY_NO_CLEAR is set?

I guess so yes -- it was initially to have an efficient tight loop to check all
replicated PTEs, but I think I found a way to merge everything e.g.

diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
index 099ccb04f52f..953f867b4943 100644
--- a/drivers/iommu/amd/io_pgtable.c
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -486,8 +486,10 @@ static phys_addr_t iommu_v1_iova_to_phys(struct
io_pgtable_ops *ops, unsigned lo
         return (__pte & ~offset_mask) | (iova & offset_mask);
  }

-static bool pte_test_dirty(u64 *ptep, unsigned long size)
+static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size,
+                                    unsigned long flags)
  {
+       bool test_only = flags & IOMMU_DIRTY_NO_CLEAR;
         bool dirty = false;
         int i, count;

@@ -501,35 +503,20 @@ static bool pte_test_dirty(u64 *ptep, unsigned long size)
          * one of the replicated PTEs.
          */
         count = PAGE_SIZE_PTE_COUNT(size);
-       for (i = 0; i < count; i++) {
-               if (test_bit(IOMMU_PTE_HD_BIT, (unsigned long *) &ptep[i])) {
+       for (i = 0; i < count && test_only; i++) {
+               if (test_bit(IOMMU_PTE_HD_BIT,
+                            (unsigned long *) &ptep[i])) {
                         dirty = true;
                         break;
                 }
         }

-       return dirty;
-}
-
-static bool pte_test_and_clear_dirty(u64 *ptep, unsigned long size)
-{
-       bool dirty = false;
-       int i, count;
-
-       /*
-        * 2.2.3.2 Host Dirty Support
-        * When a non-default page size is used , software must OR the
-        * Dirty bits in all of the replicated host PTEs used to map
-        * the page. The IOMMU does not guarantee the Dirty bits are
-        * set in all of the replicated PTEs. Any portion of the page
-        * may have been written even if the Dirty bit is set in only
-        * one of the replicated PTEs.
-        */
-       count = PAGE_SIZE_PTE_COUNT(size);
-       for (i = 0; i < count; i++)
+       for (i = 0; i < count && !test_only; i++) {
                 if (test_and_clear_bit(IOMMU_PTE_HD_BIT,
-                                       (unsigned long *) &ptep[i]))
+                                      (unsigned long *) &ptep[i])) {
                         dirty = true;
+               }
+       }

         return dirty;
  }
@@ -559,9 +546,7 @@ static int iommu_v1_read_and_clear_dirty(struct
io_pgtable_ops *ops,
                  * Mark the whole IOVA range as dirty even if only one of
                  * the replicated PTEs were marked dirty.
                  */
-               if (((flags & IOMMU_DIRTY_NO_CLEAR) &&
-                               pte_test_dirty(ptep, pgsize)) ||
-                   pte_test_and_clear_dirty(ptep, pgsize))
+               if (pte_test_and_clear_dirty(ptep, pgsize, flags))
                         iommu_dirty_bitmap_record(dirty, iova, pgsize);
                 iova += pgsize;
         } while (iova < end);

+
+static int iommu_v1_read_and_clear_dirty(struct io_pgtable_ops *ops,
+                     unsigned long iova, size_t size,
+                     unsigned long flags,
+                     struct iommu_dirty_bitmap *dirty)
+{
+    struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+    unsigned long end = iova + size - 1;
+
+    do {
+        unsigned long pgsize = 0;
+        u64 *ptep, pte;
+
+        ptep = fetch_pte(pgtable, iova, &pgsize);
+        if (ptep)
+            pte = READ_ONCE(*ptep);
+        if (!ptep || !IOMMU_PTE_PRESENT(pte)) {
+            pgsize = pgsize ?: PTE_LEVEL_PAGE_SIZE(0);
+            iova += pgsize;
+            continue;
+        }
+
+        /*
+         * Mark the whole IOVA range as dirty even if only one of
+         * the replicated PTEs were marked dirty.
+         */
+        if (((flags & IOMMU_DIRTY_NO_CLEAR) &&
+                pte_test_dirty(ptep, pgsize)) ||
+            pte_test_and_clear_dirty(ptep, pgsize))
+            iommu_dirty_bitmap_record(dirty, iova, pgsize);
+        iova += pgsize;
+    } while (iova < end);
+

You earlier point made me discover that the test-only case might end up clearing
the PTE unnecessarily. But I have addressed it in the previous comment

Reviewed by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>

Thanks,
Suravee



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux