Hi Rob, On Wed, Jul 17, 2024 at 09:36:21AM -0700, Rob Clark wrote: > From: Rob Clark <robdclark@xxxxxxxxxxxx> > > Add an io-pgtable method to walk the pgtable returning the raw PTEs that > would be traversed for a given iova access. > > Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx> > Acked-by: Joerg Roedel <jroedel@xxxxxxx> > > --- > drivers/iommu/io-pgtable-arm.c | 36 +++++++++++++++++++++++++--------- > include/linux/io-pgtable.h | 17 ++++++++++++++++ > 2 files changed, 44 insertions(+), 9 deletions(-) > > diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c > index 3d23b924cec1..e70803940b46 100644 > --- a/drivers/iommu/io-pgtable-arm.c > +++ b/drivers/iommu/io-pgtable-arm.c > @@ -690,9 +690,11 @@ static size_t arm_lpae_unmap_pages(struct io_pgtable_ops *ops, unsigned long iov > data->start_level, ptep); > } > > -static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > - unsigned long iova) > +static int arm_lpae_pgtable_walk(struct io_pgtable_ops *ops, > + unsigned long iova, > + void *_wd) > { > + struct arm_lpae_io_pgtable_walk_data *wd = _wd; > struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > arm_lpae_iopte pte, *ptep = data->pgd; > int lvl = data->start_level; > @@ -700,7 +702,7 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > do { > /* Valid IOPTE pointer? */ > if (!ptep) > - return 0; > + return -ENOENT; > > /* Grab the IOPTE we're interested in */ > ptep += ARM_LPAE_LVL_IDX(iova, lvl, data); > @@ -708,22 +710,37 @@ static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > > /* Valid entry? */ > if (!pte) > - return 0; > + return -ENOENT; > > - /* Leaf entry? */ > + wd->ptes[wd->level++] = pte; > + > + /* Leaf entry? If so, we've found the translation */ > if (iopte_leaf(pte, lvl, data->iop.fmt)) > - goto found_translation; > + return 0; > > /* Take it to the next level */ > ptep = iopte_deref(pte, data); > } while (++lvl < ARM_LPAE_MAX_LEVELS); > > /* Ran out of page tables to walk */ > - return 0; > + return -ENOENT; > +} > + > +static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, > + unsigned long iova) > +{ > + struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); > + struct arm_lpae_io_pgtable_walk_data wd = {}; > + int ret, lvl; > + > + ret = arm_lpae_pgtable_walk(ops, iova, &wd); > + if (ret) > + return 0; > + > + lvl = wd.level + data->start_level; > > -found_translation: > iova &= (ARM_LPAE_BLOCK_SIZE(lvl, data) - 1); > - return iopte_to_paddr(pte, data) | iova; > + return iopte_to_paddr(wd.ptes[wd.level - 1], data) | iova; > } > > static void arm_lpae_restrict_pgsizes(struct io_pgtable_cfg *cfg) > @@ -804,6 +821,7 @@ arm_lpae_alloc_pgtable(struct io_pgtable_cfg *cfg) > .map_pages = arm_lpae_map_pages, > .unmap_pages = arm_lpae_unmap_pages, > .iova_to_phys = arm_lpae_iova_to_phys, > + .pgtable_walk = arm_lpae_pgtable_walk, > }; > > return data; > diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h > index 86cf1f7ae389..df6f6e58310c 100644 > --- a/include/linux/io-pgtable.h > +++ b/include/linux/io-pgtable.h > @@ -171,12 +171,28 @@ struct io_pgtable_cfg { > }; > }; > > +/** > + * struct arm_lpae_io_pgtable_walk_data - information from a pgtable walk > + * > + * @ptes: The recorded PTE values from the walk > + * @level: The level of the last PTE > + * > + * @level also specifies the last valid index in @ptes > + */ > +struct arm_lpae_io_pgtable_walk_data { > + u64 ptes[4]; I don’t see a reason to save the whole walk for iova_to_phys, I see that the DRM driver uses those next, but I am worried that won’t scale, a callback mechanism sounds better. Also, there is a page table walker recently added to io-pagtable-arm, for dirty bit tracking: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/drivers/iommu/io-pgtable-arm.c?id=4fe88fd8b4aecb7f9680bf898811db76b94095a9 I’d suggest consolidating those into one walker where each caller has its own logic in a callback. Thanks, Mostafa > + int level; > +}; > + > /** > * struct io_pgtable_ops - Page table manipulation API for IOMMU drivers. > * > * @map_pages: Map a physically contiguous range of pages of the same size. > * @unmap_pages: Unmap a range of virtually contiguous pages of the same size. > * @iova_to_phys: Translate iova to physical address. > + * @pgtable_walk: (optional) Perform a page table walk for a given iova. The > + * type for the wd parameter is specific to pgtable type, as > + * the PTE size and number of levels differs per pgtable type. > * > * These functions map directly onto the iommu_ops member functions with > * the same names. > @@ -190,6 +206,7 @@ struct io_pgtable_ops { > struct iommu_iotlb_gather *gather); > phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, > unsigned long iova); > + int (*pgtable_walk)(struct io_pgtable_ops *ops, unsigned long iova, void *wd); > int (*read_and_clear_dirty)(struct io_pgtable_ops *ops, > unsigned long iova, size_t size, > unsigned long flags, > -- > 2.45.2 >