On 16/06/2023 17:46, Shameerali Kolothum Thodi wrote: > Hi Joao, > >> -----Original Message----- >> From: Joao Martins [mailto:joao.m.martins@xxxxxxxxxx] >> Sent: 18 May 2023 21:47 >> To: iommu@xxxxxxxxxxxxxxx >> Cc: Jason Gunthorpe <jgg@xxxxxxxxxx>; Kevin Tian <kevin.tian@xxxxxxxxx>; >> Shameerali Kolothum Thodi <shameerali.kolothum.thodi@xxxxxxxxxx>; Lu >> Baolu <baolu.lu@xxxxxxxxxxxxxxx>; Yi Liu <yi.l.liu@xxxxxxxxx>; Yi Y Sun >> <yi.y.sun@xxxxxxxxx>; Eric Auger <eric.auger@xxxxxxxxxx>; Nicolin Chen >> <nicolinc@xxxxxxxxxx>; Joerg Roedel <joro@xxxxxxxxxx>; Jean-Philippe >> Brucker <jean-philippe@xxxxxxxxxx>; Suravee Suthikulpanit >> <suravee.suthikulpanit@xxxxxxx>; Will Deacon <will@xxxxxxxxxx>; Robin >> Murphy <robin.murphy@xxxxxxx>; Alex Williamson >> <alex.williamson@xxxxxxxxxx>; kvm@xxxxxxxxxxxxxxx; Joao Martins >> <joao.m.martins@xxxxxxxxxx> >> Subject: [PATCH RFCv2 22/24] iommu/arm-smmu-v3: Add >> read_and_clear_dirty() support >> >> From: Keqian Zhu <zhukeqian1@xxxxxxxxxx> >> >> .read_and_clear_dirty() IOMMU domain op takes care of reading the dirty >> bits (i.e. PTE has both DBM and AP[2] set) and marshalling into a bitmap of >> a given page size. >> >> While reading the dirty bits we also clear the PTE AP[2] bit to mark it as >> writable-clean depending on read_and_clear_dirty() flags. >> >> Structure it in a way that the IOPTE walker is generic, and so we pass a >> function pointer over what to do on a per-PTE basis. >> >> [Link below points to the original version that was based on] >> >> Link: >> https://lore.kernel.org/lkml/20210413085457.25400-11-zhukeqian1@huaw >> ei.com/ >> Co-developed-by: Keqian Zhu <zhukeqian1@xxxxxxxxxx> >> Co-developed-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx> >> Signed-off-by: Kunkun Jiang <jiangkunkun@xxxxxxxxxx> >> [joaomart: Massage commit message] >> Co-developed-by: Joao Martins <joao.m.martins@xxxxxxxxxx> >> Signed-off-by: Joao Martins <joao.m.martins@xxxxxxxxxx> >> --- >> drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 23 +++++ >> drivers/iommu/io-pgtable-arm.c | 104 >> ++++++++++++++++++++ >> 2 files changed, 127 insertions(+) >> >> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> index e2b98a6a6b74..2cde14003469 100644 >> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c >> @@ -2765,6 +2765,28 @@ static int arm_smmu_enable_nesting(struct >> iommu_domain *domain) >> return ret; >> } >> >> +static int arm_smmu_read_and_clear_dirty(struct iommu_domain >> *domain, >> + unsigned long iova, size_t size, >> + unsigned long flags, >> + struct iommu_dirty_bitmap *dirty) >> +{ >> + struct arm_smmu_domain *smmu_domain = >> to_smmu_domain(domain); >> + struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; >> + int ret; >> + >> + if (smmu_domain->stage != ARM_SMMU_DOMAIN_S1) >> + return -EINVAL; >> + >> + if (!ops || !ops->read_and_clear_dirty) { >> + pr_err_once("io-pgtable don't support dirty tracking\n"); >> + return -ENODEV; >> + } >> + >> + ret = ops->read_and_clear_dirty(ops, iova, size, flags, dirty); >> + >> + return ret; >> +} >> + >> static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args >> *args) >> { >> return iommu_fwspec_add_ids(dev, args->args, 1); >> @@ -2893,6 +2915,7 @@ static struct iommu_ops arm_smmu_ops = { >> .iova_to_phys = arm_smmu_iova_to_phys, >> .enable_nesting = arm_smmu_enable_nesting, >> .free = arm_smmu_domain_free, >> + .read_and_clear_dirty = arm_smmu_read_and_clear_dirty, >> } >> }; >> >> diff --git a/drivers/iommu/io-pgtable-arm.c >> b/drivers/iommu/io-pgtable-arm.c >> index b2f470529459..de9e61f8452d 100644 >> --- a/drivers/iommu/io-pgtable-arm.c >> +++ b/drivers/iommu/io-pgtable-arm.c >> @@ -717,6 +717,109 @@ static phys_addr_t arm_lpae_iova_to_phys(struct >> io_pgtable_ops *ops, >> return iopte_to_paddr(pte, data) | iova; >> } >> >> +struct arm_lpae_iopte_read_dirty { >> + unsigned long flags; >> + struct iommu_dirty_bitmap *dirty; >> +}; >> + >> +static int __arm_lpae_read_and_clear_dirty(unsigned long iova, size_t size, >> + arm_lpae_iopte *ptep, void *opaque) >> +{ >> + struct arm_lpae_iopte_read_dirty *arg = opaque; >> + struct iommu_dirty_bitmap *dirty = arg->dirty; >> + arm_lpae_iopte pte; >> + >> + pte = READ_ONCE(*ptep); >> + if (WARN_ON(!pte)) >> + return -EINVAL; >> + >> + if ((pte & ARM_LPAE_PTE_AP_WRITABLE) == >> ARM_LPAE_PTE_AP_WRITABLE) >> + return 0; >> + >> + iommu_dirty_bitmap_record(dirty, iova, size); >> + if (!(arg->flags & IOMMU_DIRTY_NO_CLEAR)) >> + set_bit(ARM_LPAE_PTE_AP_RDONLY_BIT, (unsigned long *)ptep); >> + return 0; >> +} >> + >> +static int __arm_lpae_iopte_walk(struct arm_lpae_io_pgtable *data, >> + unsigned long iova, size_t size, >> + int lvl, arm_lpae_iopte *ptep, >> + int (*fn)(unsigned long iova, size_t size, >> + arm_lpae_iopte *pte, void *opaque), >> + void *opaque) >> +{ >> + arm_lpae_iopte pte; >> + struct io_pgtable *iop = &data->iop; >> + size_t base, next_size; >> + int ret; >> + >> + if (WARN_ON_ONCE(!fn)) >> + return -EINVAL; >> + >> + if (WARN_ON(lvl == ARM_LPAE_MAX_LEVELS)) >> + return -EINVAL; >> + >> + ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); >> + pte = READ_ONCE(*ptep); >> + if (WARN_ON(!pte)) >> + return -EINVAL; >> + >> + if (size == ARM_LPAE_BLOCK_SIZE(lvl, data)) { >> + if (iopte_leaf(pte, lvl, iop->fmt)) >> + return fn(iova, size, ptep, opaque); >> + >> + /* Current level is table, traverse next level */ >> + next_size = ARM_LPAE_BLOCK_SIZE(lvl + 1, data); >> + ptep = iopte_deref(pte, data); >> + for (base = 0; base < size; base += next_size) { >> + ret = __arm_lpae_iopte_walk(data, iova + base, >> + next_size, lvl + 1, ptep, >> + fn, opaque); >> + if (ret) >> + return ret; >> + } >> + return 0; >> + } else if (iopte_leaf(pte, lvl, iop->fmt)) { >> + return fn(iova, size, ptep, opaque); >> + } >> + >> + /* Keep on walkin */ >> + ptep = iopte_deref(pte, data); >> + return __arm_lpae_iopte_walk(data, iova, size, lvl + 1, ptep, >> + fn, opaque); >> +} >> + >> +static int arm_lpae_read_and_clear_dirty(struct io_pgtable_ops *ops, >> + unsigned long iova, size_t size, >> + unsigned long flags, >> + struct iommu_dirty_bitmap *dirty) >> +{ >> + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); >> + struct io_pgtable_cfg *cfg = &data->iop.cfg; >> + struct arm_lpae_iopte_read_dirty arg = { >> + .flags = flags, .dirty = dirty, >> + }; >> + arm_lpae_iopte *ptep = data->pgd; >> + int lvl = data->start_level; >> + long iaext = (s64)iova >> cfg->ias; >> + >> + if (WARN_ON(!size || (size & cfg->pgsize_bitmap) != size)) >> + return -EINVAL; > > I guess the size here is supposed to be one of the pgsize that iommu supports. > But looking at the code, it looks like we are passing the iova mapped length and > it fails here in my test setup. Could you please check and confirm. > I think this might be from the original patch, and it's meant to test that length is aligned to the page size, but I failed to removed it this snip. We should remove this. > Thanks, > Shameer > > >> + >> + if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_TTBR1) >> + iaext = ~iaext; >> + if (WARN_ON(iaext)) >> + return -EINVAL; >> + >> + if (data->iop.fmt != ARM_64_LPAE_S1 && >> + data->iop.fmt != ARM_32_LPAE_S1) >> + return -EINVAL; >> + >> + return __arm_lpae_iopte_walk(data, iova, size, lvl, ptep, >> + __arm_lpae_read_and_clear_dirty, &arg); >> +} >> + >> static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) >> { >> unsigned long granule, page_sizes; >> @@ -795,6 +898,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) >> .map_pages = arm_lpae_map_pages, >> .unmap_pages = arm_lpae_unmap_pages, >> .iova_to_phys = arm_lpae_iova_to_phys, >> + .read_and_clear_dirty = arm_lpae_read_and_clear_dirty, >> }; >> >> return data; >> -- >> 2.17.2 >