Am Freitag, dem 08.11.2024 um 22:36 +0800 schrieb Sui Jingfeng: > The dma_direct_alloc() allocate one page at minmium, which size is the CPU > PAGE_SIZE. while the etnaviv_iommuv2_ensure_stlb() only ask for 4KiB. The > rest memory space that beyond 4KiB gets wasted on bigger page size systems. > For example, on 16KiB CPU page size systems, we will waste the rest 12KiB. > On 64KiB CPU page size systems, we will waste the rest 60KiB. > > Since addresses within one page are always contiguous, the rest memory can > be used to store adjacent slave TLB entries. Then, when the neighbourhoods > TLB is being hit on the next time, we don't have to ask another one page > from the system. Saving both memorys and times overhead because of that. > While this isn't adding a lot of code to etnaviv, I wonder if this couldn't be handled by using a dma_pool for the pagetable allocations. Regards, Lucas > Signed-off-by: Sui Jingfeng <sui.jingfeng@xxxxxxxxx> > --- > drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c | 64 +++++++++++++++++++--- > 1 file changed, 56 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c > index d664ae29ae20..fa6eed1ae1be 100644 > --- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c > +++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c > @@ -44,19 +44,66 @@ to_v2_context(struct etnaviv_iommu_context *context) > return container_of(context, struct etnaviv_iommuv2_context, base); > } > > +static int etnaviv_iommuv2_stlb_free(struct etnaviv_iommuv2_context *context) > +{ > + struct device *dev = context->base.global->dev; > + unsigned int i; > + > + for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; ++i) { > + u32 *vaddr = context->stlb_cpu[i]; > + > + if (!vaddr) > + continue; > + > + context->stlb_cpu[i] = NULL; > + > + if (i % (PAGE_SIZE / SZ_4K)) > + continue; > + > + dma_free_wc(dev, PAGE_SIZE, vaddr, context->stlb_dma[i]); > + } > + > + return 0; > +} > + > +static int > +etnaviv_iommuv2_ensure_stlb_new(struct etnaviv_iommuv2_context *context, > + unsigned int stlb) > +{ > + struct device *dev = context->base.global->dev; > + void *vaddr; > + dma_addr_t daddr; > + unsigned int i; > + > + if (context->stlb_cpu[stlb]) > + return 0; > + > + vaddr = dma_alloc_wc(dev, PAGE_SIZE, &daddr, GFP_KERNEL); > + if (!vaddr) > + return -ENOMEM; > + > + memset32(vaddr, MMUv2_PTE_EXCEPTION, PAGE_SIZE / sizeof(u32)); > + > + stlb &= ~(PAGE_SIZE / SZ_4K - 1); > + > + for (i = 0; i < PAGE_SIZE / SZ_4K; ++i) { > + context->stlb_cpu[stlb + i] = vaddr; > + context->stlb_dma[stlb + i] = daddr; > + context->mtlb_cpu[stlb + i] = daddr | MMUv2_PTE_PRESENT; > + vaddr += SZ_4K; > + daddr += SZ_4K; > + } > + > + return 0; > +} > + > static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context) > { > struct etnaviv_iommuv2_context *v2_context = to_v2_context(context); > - int i; > > drm_mm_takedown(&context->mm); > > - for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) { > - if (v2_context->stlb_cpu[i]) > - dma_free_wc(context->global->dev, SZ_4K, > - v2_context->stlb_cpu[i], > - v2_context->stlb_dma[i]); > - } > + etnaviv_iommuv2_stlb_free(v2_context); > > dma_free_wc(context->global->dev, SZ_4K, v2_context->mtlb_cpu, > v2_context->mtlb_dma); > @@ -65,6 +112,7 @@ static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context) > > vfree(v2_context); > } > + > static int > etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_context *v2_context, > int stlb) > @@ -109,7 +157,7 @@ static int etnaviv_iommuv2_map(struct etnaviv_iommu_context *context, > mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT; > stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT; > > - ret = etnaviv_iommuv2_ensure_stlb(v2_context, mtlb_entry); > + ret = etnaviv_iommuv2_ensure_stlb_new(v2_context, mtlb_entry); > if (ret) > return ret; >