On Mon, Mar 15, 2021 at 01:36:31PM -0700, Nicolin Chen wrote: > This patch dumps all active mapping entries from pagetable > to a debugfs directory named "mappings". > > Attaching an example: > > SWGROUP: hc > ASID: 0 > reg: 0x250 > PTB_ASID: 0xe0080004 > as->pd_dma: 0x80004000 > { > [1023] 0xf008000b (1) > { > PTE RANGE | ATTR | PHYS | IOVA | SIZE > [#1023, #1023] | 0x5 | 0x0000000111a8d000 | 0x00000000fffff000 | 0x1000 > } > } > Total PDE count: 1 > Total PTE count: 1 > > Tested-by: Dmitry Osipenko <digetx@xxxxxxxxx> > Reviewed-by: Dmitry Osipenko <digetx@xxxxxxxxx> > Signed-off-by: Nicolin Chen <nicoleotsuka@xxxxxxxxx> > --- > > Changelog > v5: > * Fixed a typo in commit message > * Splitted a long line into two lines > * Rearranged variable defines by length > * Added Tested-by and Reviewed-by from Dmitry > v4: https://lkml.org/lkml/2021/3/14/429 > * Changed %d to %u for unsigned variables > * Fixed print format mismatch warnings on ARM32 > v3: https://lkml.org/lkml/2021/3/14/30 > * Fixed PHYS and IOVA print formats > * Changed variables to unsigned int type > * Changed the table outputs to be compact > v2: https://lkml.org/lkml/2021/3/9/1382 > * Expanded mutex range to the entire function > * Added as->lock to protect pagetable walkthrough > * Replaced devm_kzalloc with devm_kcalloc for group_debug > * Added "PTE RANGE" and "SIZE" columns to group contiguous mappings > * Dropped as->count check; added WARN_ON when as->count mismatches pd[pd_index] > v1: https://lkml.org/lkml/2020/9/26/70 > > drivers/iommu/tegra-smmu.c | 181 ++++++++++++++++++++++++++++++++++++- > 1 file changed, 176 insertions(+), 5 deletions(-) > > diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c > index 97eb62f667d2..b728cae63314 100644 > --- a/drivers/iommu/tegra-smmu.c > +++ b/drivers/iommu/tegra-smmu.c > @@ -19,6 +19,11 @@ > #include <soc/tegra/ahb.h> > #include <soc/tegra/mc.h> > > +struct tegra_smmu_group_debug { > + const struct tegra_smmu_swgroup *group; > + void *priv; This always stores the address space, so why not make this: struct tegra_smmu_as *as; ? While at it, perhaps throw in a const to make sure we don't modify this structure in the debugfs code. > +}; > + > struct tegra_smmu_group { > struct list_head list; > struct tegra_smmu *smmu; > @@ -47,6 +52,8 @@ struct tegra_smmu { > struct dentry *debugfs; > > struct iommu_device iommu; /* IOMMU Core code handle */ > + > + struct tegra_smmu_group_debug *group_debug; > }; > > struct tegra_smmu_as { > @@ -152,6 +159,9 @@ static inline u32 smmu_readl(struct tegra_smmu *smmu, unsigned long offset) > > #define SMMU_PDE_ATTR (SMMU_PDE_READABLE | SMMU_PDE_WRITABLE | \ > SMMU_PDE_NONSECURE) > +#define SMMU_PTE_ATTR (SMMU_PTE_READABLE | SMMU_PTE_WRITABLE | \ > + SMMU_PTE_NONSECURE) > +#define SMMU_PTE_ATTR_SHIFT (29) No need for the parentheses here. > > static unsigned int iova_pd_index(unsigned long iova) > { > @@ -163,6 +173,12 @@ static unsigned int iova_pt_index(unsigned long iova) > return (iova >> SMMU_PTE_SHIFT) & (SMMU_NUM_PTE - 1); > } > > +static unsigned long pd_pt_index_iova(unsigned int pd_index, unsigned int pt_index) > +{ > + return ((dma_addr_t)pd_index & (SMMU_NUM_PDE - 1)) << SMMU_PDE_SHIFT | > + ((dma_addr_t)pt_index & (SMMU_NUM_PTE - 1)) << SMMU_PTE_SHIFT; > +} > + > static bool smmu_dma_addr_valid(struct tegra_smmu *smmu, dma_addr_t addr) > { > addr >>= 12; > @@ -334,7 +350,7 @@ static void tegra_smmu_domain_free(struct iommu_domain *domain) > } > > static const struct tegra_smmu_swgroup * > -tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup) > +tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup, int *index) > { > const struct tegra_smmu_swgroup *group = NULL; > unsigned int i; > @@ -342,6 +358,8 @@ tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup) > for (i = 0; i < smmu->soc->num_swgroups; i++) { > if (smmu->soc->swgroups[i].swgroup == swgroup) { > group = &smmu->soc->swgroups[i]; > + if (index) > + *index = i; This doesn't look like the right place for this. And this also makes things hard to follow because it passes out-of-band data in the index parameter. I'm thinking that this could benefit from a bit of refactoring where we could for example embed struct tegra_smmu_group_debug into struct tegra_smmu_group and then reference that when necessary instead of carrying all that data in an orthogonal array. That should also make it easier to track this. Come to think of it, everything that's currently in your new struct tegra_smmu_group_debug would be useful in struct tegra_smmu_group, irrespective of debugfs support. > break; > } > } > @@ -350,19 +368,22 @@ tegra_smmu_find_swgroup(struct tegra_smmu *smmu, unsigned int swgroup) > } > > static void tegra_smmu_enable(struct tegra_smmu *smmu, unsigned int swgroup, > - unsigned int asid) > + struct tegra_smmu_as *as) > { > const struct tegra_smmu_swgroup *group; > + unsigned int asid = as->id; > unsigned int i; > u32 value; > > - group = tegra_smmu_find_swgroup(smmu, swgroup); > + group = tegra_smmu_find_swgroup(smmu, swgroup, &i); > if (group) { > value = smmu_readl(smmu, group->reg); > value &= ~SMMU_ASID_MASK; > value |= SMMU_ASID_VALUE(asid); > value |= SMMU_ASID_ENABLE; > smmu_writel(smmu, value, group->reg); > + if (smmu->group_debug) > + smmu->group_debug[i].priv = as; Logically I think this would also be better located in tegra_smmu_attach_dev() because we're setting up the relationship between a group and an address space here and this is not in fact part of enabling the SMMU. > } else { > pr_warn("%s group from swgroup %u not found\n", __func__, > swgroup); > @@ -389,13 +410,15 @@ static void tegra_smmu_disable(struct tegra_smmu *smmu, unsigned int swgroup, > unsigned int i; > u32 value; > > - group = tegra_smmu_find_swgroup(smmu, swgroup); > + group = tegra_smmu_find_swgroup(smmu, swgroup, &i); > if (group) { > value = smmu_readl(smmu, group->reg); > value &= ~SMMU_ASID_MASK; > value |= SMMU_ASID_VALUE(asid); > value &= ~SMMU_ASID_ENABLE; > smmu_writel(smmu, value, group->reg); > + if (smmu->group_debug) > + smmu->group_debug[i].priv = NULL; > } > > for (i = 0; i < smmu->soc->num_clients; i++) { > @@ -499,7 +522,7 @@ static int tegra_smmu_attach_dev(struct iommu_domain *domain, > if (err) > goto disable; > > - tegra_smmu_enable(smmu, fwspec->ids[index], as->id); > + tegra_smmu_enable(smmu, fwspec->ids[index], as); > } > > if (index == 0) > @@ -1058,8 +1081,141 @@ static int tegra_smmu_clients_show(struct seq_file *s, void *data) > > DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients); > > +static int tegra_smmu_mappings_show(struct seq_file *s, void *data) > +{ > + struct tegra_smmu_group_debug *group_debug = s->private; > + const struct tegra_smmu_swgroup *group; > + struct tegra_smmu_as *as; > + struct tegra_smmu *smmu; > + unsigned int pd_index; > + unsigned int pt_index; > + unsigned long flags; > + u64 pte_count = 0; > + u32 pde_count = 0; > + u32 val, ptb_reg; > + u32 *pd; > + > + if (!group_debug || !group_debug->priv || !group_debug->group) > + return 0; > + > + group = group_debug->group; > + as = group_debug->priv; > + smmu = as->smmu; > + > + mutex_lock(&smmu->lock); > + > + val = smmu_readl(smmu, group->reg) & SMMU_ASID_ENABLE; > + if (!val) > + goto unlock; > + > + pd = page_address(as->pd); > + if (!pd) > + goto unlock; > + > + seq_printf(s, "\nSWGROUP: %s\nASID: %d\nreg: 0x%x\n", > + group->name, as->id, group->reg); Is group->reg really that useful here? > + > + smmu_writel(smmu, as->id & 0x7f, SMMU_PTB_ASID); > + ptb_reg = smmu_readl(smmu, SMMU_PTB_DATA); > + > + seq_printf(s, "PTB_ASID: 0x%x\nas->pd_dma: %pad\n", > + ptb_reg, &as->pd_dma); This looks somewhat redundant because as->pd_dma is already part of the PTB_ASID register value. Instead, perhaps decode the upper bits of that register and simply output as->pdma so that we don't duplicate the base address of the page table? > + seq_puts(s, "{\n"); > + > + spin_lock_irqsave(&as->lock, flags); > + > + for (pd_index = 0; pd_index < SMMU_NUM_PDE; pd_index++) { > + struct page *pt_page; > + unsigned int i; > + u32 *addr; > + > + /* An empty PDE should not have a pte use count */ > + WARN_ON_ONCE(!pd[pd_index] ^ !as->count[pd_index]); > + > + /* Skip this empty PDE */ > + if (!pd[pd_index]) > + continue; > + > + pde_count++; > + pte_count += as->count[pd_index]; > + seq_printf(s, "\t[%u] 0x%x (%d)\n", > + pd_index, pd[pd_index], as->count[pd_index]); > + pt_page = as->pts[pd_index]; > + addr = page_address(pt_page); > + > + seq_puts(s, "\t{\n"); > + seq_printf(s, "\t\t%-14s | %-4s | %-10s%s | %-10s%s | %-11s\n", > + "PTE RANGE", "ATTR", > + "PHYS", sizeof(phys_addr_t) > 4 ? " " : "", > + "IOVA", sizeof(dma_addr_t) > 4 ? " " : "", > + "SIZE"); > + for (pt_index = 0; pt_index < SMMU_NUM_PTE; pt_index += i) { > + size_t size = SMMU_SIZE_PT; > + dma_addr_t iova; > + phys_addr_t pa; > + > + i = 1; > + > + if (!addr[pt_index]) > + continue; > + > + iova = pd_pt_index_iova(pd_index, pt_index); > + pa = SMMU_PFN_PHYS(addr[pt_index] & ~SMMU_PTE_ATTR); > + > + /* Check contiguous mappings and increase size */ > + while (pt_index + i < SMMU_NUM_PTE) { > + dma_addr_t next_iova; > + phys_addr_t next_pa; > + > + if (!addr[pt_index + i]) > + break; > + > + next_iova = pd_pt_index_iova(pd_index, pt_index + i); > + next_pa = SMMU_PFN_PHYS(addr[pt_index + i] & ~SMMU_PTE_ATTR); > + > + /* Break at the end of a linear mapping */ > + if ((next_iova - iova != SMMU_SIZE_PT * i) || > + (next_pa - pa != SMMU_SIZE_PT * i)) > + break; > + > + i++; > + } > + > + seq_printf(s, "\t\t[#%-4u, #%-4u] | 0x%-2x | %pa | %pad | 0x%-9zx\n", > + pt_index, pt_index + i - 1, > + addr[pt_index] >> SMMU_PTE_ATTR_SHIFT, > + &pa, &iova, size * i); > + } > + seq_puts(s, "\t}\n"); > + } > + > + spin_unlock_irqrestore(&as->lock, flags); > + > + seq_puts(s, "}\n"); > + seq_printf(s, "Total PDE count: %u\n", pde_count); > + seq_printf(s, "Total PTE count: %llu\n", pte_count); Some of the above looks like it wouldn't be very easily consumed by scripts. Is that something we want to do? Or is this targetted primarily at human consumption? > + > +unlock: > + mutex_unlock(&smmu->lock); > + > + return 0; > +} > + > +DEFINE_SHOW_ATTRIBUTE(tegra_smmu_mappings); > + > static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu) > { > + const struct tegra_smmu_soc *soc = smmu->soc; > + struct tegra_smmu_group_debug *group_debug; > + struct device *dev = smmu->dev; > + struct dentry *d; > + unsigned int i; > + > + group_debug = devm_kcalloc(dev, soc->num_swgroups, > + sizeof(*group_debug), GFP_KERNEL); > + if (!group_debug) > + return; Memory allocation also becomes slightly easier if this is just embedded inside struct tegra_smmu_group. Thierry
Attachment:
signature.asc
Description: PGP signature