From: Keqian Zhu <zhukeqian1@xxxxxxxxxx> .read_and_clear_dirty() IOMMU domain op takes care of reading the dirty bits (i.e. PTE has both DBM and AP[2] set) and marshalling into a bitmap of a given page size. While reading the dirty bits we also clear the PTE AP[2] bit to mark it as writable-clean depending on read_and_clear_dirty() flags. Structure it in a way that the IOPTE walker is generic, and so we pass a function pointer over what to do on a per-PTE basis. [Link below points to the original version that was based on] Link: https://lore.kernel.org/lkml/20210413085457.25400-11-zhukeqian1@xxxxxxxxxx/ Co-developed-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx> Co-developed-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx> Signed-off-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx> [joaomart: Massage commit message] Co-developed-by: Joao Martins <joao.m.martins@xxxxxxxxxx> Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> --- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 23 +++++ drivers/iommu/io-pgtable-arm.c | 104 ++++++++++++++++++++ 2 files changed, 127 insertions(+) diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index e2b98a6a6b74..2cde14003469 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -2765,6 +2765,28 @@ static int arm_smmu_enable_nesting(struct iommu_domain *domain) return ret; } +static int arm_smmu_read_and_clear_dirty(struct iommu_domain *domain, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + int ret; + + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) + return -EINVAL; + + if (!ops || !ops->read_and_clear_dirty) { + pr_err_once("io-pgtable don't support dirty tracking\n"); + return -ENODEV; + } + + ret = ops->read_and_clear_dirty(ops, iova, size, flags, dirty); + + return ret; +} + static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { return iommu_fwspec_add_ids(dev, args->args, 1); @@ -2893,6 +2915,7 @@ static struct iommu_ops arm_smmu_ops = { .iova_to_phys = arm_smmu_iova_to_phys, .enable_nesting = arm_smmu_enable_nesting, .free = arm_smmu_domain_free, + .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, } }; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index b2f470529459..de9e61f8452d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -717,6 +717,109 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, return iopte_to_paddr(pte, data) | iova; } +struct arm_lpae_iopte_read_dirty { + unsigned long flags; + struct iommu_dirty_bitmap *dirty; +}; + +static int __arm_lpae_read_and_clear_dirty(unsigned long iova, size_t size, + arm_lpae_iopte *ptep, void *opaque) +{ + struct arm_lpae_iopte_read_dirty *arg = opaque; + struct iommu_dirty_bitmap *dirty = arg->dirty; + arm_lpae_iopte pte; + + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + return -EINVAL; + + if ((pte & ARM_LPAE_PTE_AP_WRITABLE) == ARM_LPAE_PTE_AP_WRITABLE) + return 0; + + iommu_dirty_bitmap_record(dirty, iova, size); + if (!(arg->flags & IOMMU_DIRTY_NO_CLEAR)) + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)ptep); + return 0; +} + +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, + unsigned long iova, size_t size, + int lvl, arm_lpae_iopte *ptep, + int (*fn)(unsigned long iova, size_t size, + arm_lpae_iopte *pte, void *opaque), + void *opaque) +{ + arm_lpae_iopte pte; + struct io_pgtable *iop = &data->iop; + size_t base, next_size; + int ret; + + if (WARN_ON_ONCE(!fn)) + return -EINVAL; + + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) + return -EINVAL; + + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); + pte = READ_ONCE(*ptep); + if (WARN_ON(!pte)) + return -EINVAL; + + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { + if (iopte_leaf(pte, lvl, iop->fmt)) + return fn(iova, size, ptep, opaque); + + /* Current level is table, traverse next level */ + next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data); + ptep = iopte_deref(pte, data); + for (base = 0; base < size; base += next_size) { + ret = __arm_lpae_iopte_walk(data, iova + base, + next_size, lvl + 1, ptep, + fn, opaque); + if (ret) + return ret; + } + return 0; + } else if (iopte_leaf(pte, lvl, iop->fmt)) { + return fn(iova, size, ptep, opaque); + } + + /* Keep on walkin */ + ptep = iopte_deref(pte, data); + return __arm_lpae_iopte_walk(data, iova, size, lvl + 1, ptep, + fn, opaque); +} + +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, + unsigned long iova, size_t size, + unsigned long flags, + struct iommu_dirty_bitmap *dirty) +{ + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &data->iop.cfg; + struct arm_lpae_iopte_read_dirty arg = { + .flags = flags, .dirty = dirty, + }; + arm_lpae_iopte *ptep = data->pgd; + int lvl = data->start_level; + long iaext = (s64)iova >> cfg->ias; + + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) + return -EINVAL; + + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) + iaext = ~iaext; + if (WARN_ON(iaext)) + return -EINVAL; + + if (data->iop.fmt != ARM_64_LPAE_S1 && + data->iop.fmt != ARM_32_LPAE_S1) + return -EINVAL; + + return __arm_lpae_iopte_walk(data, iova, size, lvl, ptep, + __arm_lpae_read_and_clear_dirty, &arg); +} + static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) { unsigned long granule, page_sizes; @@ -795,6 +898,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) .map_pages = arm_lpae_map_pages, .unmap_pages = arm_lpae_unmap_pages, .iova_to_phys = arm_lpae_iova_to_phys, + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, }; return data; -- 2.17.2