AMD implementation of unmap_read_dirty() is pretty simple as mostly reuses unmap code with the extra addition of marshalling the dirty bit into the bitmap as it walks the to-be-unmapped IOPTE. Extra care is taken though, to switch over to cmpxchg as opposed to a non-serialized store to the PTE and testing the dirty bit only set until cmpxchg succeeds to set to 0. Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- drivers/iommu/amd/io_pgtable.c | 44 +++++++++++++++++++++++++++++----- drivers/iommu/amd/iommu.c | 22 +++++++++++++++++ 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 8325ef193093..1868c3b58e6d 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -355,6 +355,16 @@ static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) free_sub_pt(pt, mode, freelist); } +static bool free_pte_dirty(u64 *pte, u64 pteval) +{ + bool dirty = false; + + while (IOMMU_PTE_DIRTY(cmpxchg64(pte, pteval, 0))) + dirty = true; + + return dirty; +} + /* * Generic mapping functions. It maps a physical address into a DMA * address space. It allocates the page table pages if necessary. @@ -428,10 +438,11 @@ static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, return ret; } -static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, - unsigned long iova, - size_t size, - struct iommu_iotlb_gather *gather) +static unsigned long __iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather, + struct iommu_dirty_bitmap *dirty) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); unsigned long long unmapped; @@ -445,11 +456,15 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, while (unmapped < size) { pte = fetch_pte(pgtable, iova, &unmap_size); if (pte) { - int i, count; + unsigned long i, count; + bool pte_dirty = false; count = PAGE_SIZE_PTE_COUNT(unmap_size); for (i = 0; i < count; i++) - pte[i] = 0ULL; + pte_dirty |= free_pte_dirty(&pte[i], pte[i]); + + if (unlikely(pte_dirty && dirty)) + iommu_dirty_bitmap_record(dirty, iova, unmap_size); } iova = (iova & ~(unmap_size - 1)) + unmap_size; @@ -461,6 +476,22 @@ static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, return unmapped; } +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) +{ + return __iommu_v1_unmap_page(ops, iova, size, gather, NULL); +} + +static unsigned long iommu_v1_unmap_page_read_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + struct iommu_iotlb_gather *gather, + struct iommu_dirty_bitmap *dirty) +{ + return __iommu_v1_unmap_page(ops, iova, size, gather, dirty); +} + static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) { struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); @@ -575,6 +606,7 @@ static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *coo pgtable->iop.ops.unmap = iommu_v1_unmap_page; pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; pgtable->iop.ops.read_and_clear_dirty = iommu_v1_read_and_clear_dirty; + pgtable->iop.ops.unmap_read_dirty = iommu_v1_unmap_page_read_dirty; return &pgtable->iop; } diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 0a86392b2367..a8fcb6e9a684 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2144,6 +2144,27 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, return r; } +static size_t amd_iommu_unmap_read_dirty(struct iommu_domain *dom, + unsigned long iova, size_t page_size, + struct iommu_iotlb_gather *gather, + struct iommu_dirty_bitmap *dirty) +{ + struct protection_domain *domain = to_pdomain(dom); + struct io_pgtable_ops *ops = &domain->iop.iop.ops; + size_t r; + + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) + return 0; + + r = (ops->unmap_read_dirty) ? + ops->unmap_read_dirty(ops, iova, page_size, gather, dirty) : 0; + + amd_iommu_iotlb_gather_add_page(dom, gather, iova, page_size); + + return r; +} + static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { @@ -2370,6 +2391,7 @@ const struct iommu_ops amd_iommu_ops = { .free = amd_iommu_domain_free, .set_dirty_tracking = amd_iommu_set_dirty_tracking, .read_and_clear_dirty = amd_iommu_read_and_clear_dirty, + .unmap_read_dirty = amd_iommu_unmap_read_dirty, } }; -- 2.17.2