On Fri, Mar 1, 2019 at 2:39 PM Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> wrote: > > Add support for per-instance pagetables for 5XX targets. Create a support > buffer for preemption to hold the SMMU pagetable information for a > preempted ring, enable TTBR1 to support split pagetables and add the > necessary PM4 commands to trigger a pagetable switch at the beginning > of a user command. > > Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> > --- > > drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 120 +++++++++++++++++++++++++++++- > drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 19 +++++ > drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 70 +++++++++++++---- > 3 files changed, 192 insertions(+), 17 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c > index 3d6f414..9294fd5 100644 > --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c > @@ -202,6 +202,59 @@ static void a5xx_submit_in_rb(struct msm_gpu *gpu, struct msm_gem_submit *submit > msm_gpu_retire(gpu); > } > > +static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring, > + struct msm_file_private *ctx) > +{ > + u64 ttbr; > + u32 asid; > + > + if (!msm_iommu_get_ptinfo(ctx->aspace->mmu, &ttbr, &asid)) > + return; > + > + ttbr = ttbr | ((u64) asid) << 48; > + > + /* Turn off protected mode */ > + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); > + OUT_RING(ring, 0); > + > + /* Turn on APIV mode to access critical regions */ > + OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1); > + OUT_RING(ring, 1); > + > + /* Make sure the ME is synchronized before staring the update */ > + OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); > + > + /* Execute the table update */ > + OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3); > + OUT_RING(ring, lower_32_bits(ttbr)); > + OUT_RING(ring, upper_32_bits(ttbr)); > + OUT_RING(ring, 0); > + > + /* > + * Write the new TTBR0 to the preemption records - this will be used to > + * reload the pagetable if the current ring gets preempted out. > + */ > + OUT_PKT7(ring, CP_MEM_WRITE, 4); > + OUT_RING(ring, lower_32_bits(rbmemptr(ring, ttbr0))); > + OUT_RING(ring, upper_32_bits(rbmemptr(ring, ttbr0))); > + OUT_RING(ring, lower_32_bits(ttbr)); > + OUT_RING(ring, upper_32_bits(ttbr)); > + > + /* Invalidate the draw state so we start off fresh */ > + OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); > + OUT_RING(ring, 0x40000); CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS? > + OUT_RING(ring, 1); was that really meant to be 1, not 0, for the low 32b of the state object address? BR, -R > + OUT_RING(ring, 0); > + > + /* Turn off APRIV */ > + OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1); > + OUT_RING(ring, 0); > + > + /* Turn off protected mode */ > + OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); > + OUT_RING(ring, 1); > +} > + > static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, > struct msm_file_private *ctx) > { > @@ -217,6 +270,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, > return; > } > > + a5xx_set_pagetable(gpu, ring, ctx); > + > OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1); > OUT_RING(ring, 0x02); > > @@ -1456,21 +1511,77 @@ static unsigned long a5xx_gpu_busy(struct msm_gpu *gpu) > return (unsigned long)busy_time; > } > > +static struct msm_gem_address_space *a5xx_new_address_space(struct msm_gpu *gpu) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); > + struct msm_gem_address_space *aspace; > + int ret; > + > + /* Return the default pagetable if per instance tables don't work */ > + if (!a5xx_gpu->per_instance_tables) > + return gpu->aspace; > + > + aspace = msm_gem_address_space_create_instance(&gpu->pdev->dev, > + "gpu", 0x100000000ULL, 0x1ffffffffULL); > + if (IS_ERR(aspace)) > + return aspace; > + > + ret = aspace->mmu->funcs->attach(aspace->mmu, NULL, 0); > + if (ret) { > + /* -ENODEV means that aux domains aren't supported */ > + if (ret == -ENODEV) > + return gpu->aspace; > + > + return ERR_PTR(ret); > + } > + > + return aspace; > +} > + > static struct msm_gem_address_space * > a5xx_create_address_space(struct msm_gpu *gpu) > { > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); > + struct device *dev = &gpu->pdev->dev; > struct msm_gem_address_space *aspace; > struct iommu_domain *iommu; > - int ret; > + int ret, val = 1; > + > + a5xx_gpu->per_instance_tables = false; > > iommu = iommu_domain_alloc(&platform_bus_type); > if (!iommu) > return NULL; > > - iommu->geometry.aperture_start = 0x100000000ULL; > - iommu->geometry.aperture_end = 0x1ffffffffULL; > + /* Try to enable split pagetables */ > + if (iommu_domain_set_attr(iommu, DOMAIN_ATTR_SPLIT_TABLES, &val)) { > + /* > + * If split pagetables aren't available we won't be able to do > + * per-instance pagetables so set up the global va space at our > + * susual location > + */ > + iommu->geometry.aperture_start = 0x100000000ULL; > + iommu->geometry.aperture_end = 0x1ffffffffULL; > + } else { > + /* > + * If split pagetables are available then we might be able to do > + * per-instance pagetables. Put the default va-space in TTBR1 to > + * prepare > + */ > + iommu->geometry.aperture_start = 0xfffffff100000000ULL; > + iommu->geometry.aperture_end = 0xfffffff1ffffffffULL; > + > + /* > + * If both split pagetables and aux domains are supported we can > + * do per_instance pagetables > + */ > + a5xx_gpu->per_instance_tables = > + iommu_dev_has_feature(dev, IOMMU_DEV_FEAT_AUX); > + } > > - aspace = msm_gem_address_space_create(&gpu->pdev->dev, iommu, "gpu"); > + aspace = msm_gem_address_space_create(dev, iommu, "gpu"); > if (IS_ERR(aspace)) { > iommu_domain_free(iommu); > DRM_DEV_ERROR(gpu->dev->dev, "failed to init mmu: %ld\n", > @@ -1510,6 +1621,7 @@ static const struct adreno_gpu_funcs funcs = { > .gpu_state_get = a5xx_gpu_state_get, > .gpu_state_put = a5xx_gpu_state_put, > .create_address_space = a5xx_create_address_space, > + .new_address_space = a5xx_new_address_space, > }, > .get_timestamp = a5xx_get_timestamp, > }; > diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h > index 7d71860..82ceb9b 100644 > --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h > @@ -45,6 +45,11 @@ struct a5xx_gpu { > > atomic_t preempt_state; > struct timer_list preempt_timer; > + struct a5xx_smmu_info *smmu_info; > + struct drm_gem_object *smmu_info_bo; > + uint64_t smmu_info_iova; > + > + bool per_instance_tables; > }; > > #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base) > @@ -132,6 +137,20 @@ struct a5xx_preempt_record { > */ > #define A5XX_PREEMPT_COUNTER_SIZE (16 * 4) > > +/* > + * This is a global structure that the preemption code uses to switch in the > + * pagetable for the preempted process - the code switches in whatever we > + * after preempting in a new ring. > + */ > +struct a5xx_smmu_info { > + uint32_t magic; > + uint32_t _pad4; > + uint64_t ttbr0; > + uint32_t asid; > + uint32_t contextidr; > +}; > + > +#define A5XX_SMMU_INFO_MAGIC 0x3618CDA3UL > > int a5xx_power_init(struct msm_gpu *gpu); > void a5xx_gpmu_ucode_init(struct msm_gpu *gpu); > diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c > index 3d62310..1050409 100644 > --- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c > +++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c > @@ -12,6 +12,7 @@ > */ > > #include "msm_gem.h" > +#include "msm_mmu.h" > #include "a5xx_gpu.h" > > /* > @@ -145,6 +146,15 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu) > a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring); > spin_unlock_irqrestore(&ring->lock, flags); > > + /* Do read barrier to make sure we have updated pagetable info */ > + rmb(); > + > + /* Set the SMMU info for the preemption */ > + if (a5xx_gpu->smmu_info) { > + a5xx_gpu->smmu_info->ttbr0 = ring->memptrs->ttbr0; > + a5xx_gpu->smmu_info->contextidr = 0; > + } > + > /* Set the address of the incoming preemption record */ > gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO, > REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI, > @@ -221,9 +231,10 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu) > a5xx_gpu->preempt[i]->rbase = gpu->rb[i]->iova; > } > > - /* Write a 0 to signal that we aren't switching pagetables */ > + /* Tell the CP where to find the smmu_info buffer*/ > gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, > - REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0); > + REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, > + a5xx_gpu->smmu_info_iova); > > /* Reset the preemption state */ > set_preempt_state(a5xx_gpu, PREEMPT_NONE); > @@ -271,6 +282,34 @@ void a5xx_preempt_fini(struct msm_gpu *gpu) > > for (i = 0; i < gpu->nr_rings; i++) > msm_gem_kernel_put(a5xx_gpu->preempt_bo[i], gpu->aspace, true); > + > + msm_gem_kernel_put(a5xx_gpu->smmu_info_bo, gpu->aspace, true); > +} > + > +static int a5xx_smmu_info_init(struct msm_gpu *gpu) > +{ > + struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > + struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); > + struct a5xx_smmu_info *ptr; > + struct drm_gem_object *bo; > + u64 iova; > + > + if (!a5xx_gpu->per_instance_tables) > + return 0; > + > + ptr = msm_gem_kernel_new(gpu->dev, sizeof(struct a5xx_smmu_info), > + MSM_BO_UNCACHED, gpu->aspace, &bo, &iova); > + > + if (IS_ERR(ptr)) > + return PTR_ERR(ptr); > + > + ptr->magic = A5XX_SMMU_INFO_MAGIC; > + > + a5xx_gpu->smmu_info_bo = bo; > + a5xx_gpu->smmu_info_iova = iova; > + a5xx_gpu->smmu_info = ptr; > + > + return 0; > } > > void a5xx_preempt_init(struct msm_gpu *gpu) > @@ -284,17 +323,22 @@ void a5xx_preempt_init(struct msm_gpu *gpu) > return; > > for (i = 0; i < gpu->nr_rings; i++) { > - if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) { > - /* > - * On any failure our adventure is over. Clean up and > - * set nr_rings to 1 to force preemption off > - */ > - a5xx_preempt_fini(gpu); > - gpu->nr_rings = 1; > - > - return; > - } > + if (preempt_init_ring(a5xx_gpu, gpu->rb[i])) > + goto fail; > } > > - timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, 0); > + if (a5xx_smmu_info_init(gpu)) > + goto fail; > + > + timer_setup(&a5xx_gpu->preempt_timer, a5xx_preempt_timer, > + (unsigned long) a5xx_gpu); > + > + return; > +fail: > + /* > + * On any failure our adventure is over. Clean up and > + * set nr_rings to 1 to force preemption off > + */ > + a5xx_preempt_fini(gpu); > + gpu->nr_rings = 1; > } > -- > 2.7.4 >