From: Rob Clark <robdclark@xxxxxxxxxxxx> Add PRR (Partial Resident Region) is a bypass address which make GPU writes go to /dev/null and reads return zero. This is used to implement vulkan sparse residency. To support PRR/NULL mappings, we allocate a page to reserve a physical address which we know will not be used as part of a GEM object, and configure the SMMU to use this address for PRR/NULL mappings. Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx> --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 10 ++++ drivers/gpu/drm/msm/msm_iommu.c | 62 ++++++++++++++++++++++++- include/uapi/drm/msm_drm.h | 2 + 3 files changed, 73 insertions(+), 1 deletion(-) diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 90848852ee50..140b4e54bc96 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -308,6 +308,13 @@ int adreno_fault_handler(struct msm_gpu *gpu, unsigned long iova, int flags, return 0; } +static bool +adreno_smmu_has_prr(struct msm_gpu *gpu) +{ + struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(&gpu->pdev->dev); + return adreno_smmu && adreno_smmu->set_prr_addr; +} + int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, uint32_t param, uint64_t *value, uint32_t *len) { @@ -392,6 +399,9 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_context *ctx, case MSM_PARAM_UCHE_TRAP_BASE: *value = adreno_gpu->uche_trap_base; return 0; + case MSM_PARAM_HAS_PRR: + *value = adreno_smmu_has_prr(gpu); + return 0; default: return UERR(EINVAL, drm, "%s: invalid param: %u", gpu->name, param); } diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c index 41cb629e25f3..bb65be95f7db 100644 --- a/drivers/gpu/drm/msm/msm_iommu.c +++ b/drivers/gpu/drm/msm/msm_iommu.c @@ -13,6 +13,7 @@ struct msm_iommu { struct msm_mmu base; struct iommu_domain *domain; atomic_t pagetables; + struct page *prr_page; }; #define to_msm_iommu(x) container_of(x, struct msm_iommu, base) @@ -112,6 +113,36 @@ static int msm_iommu_pagetable_unmap(struct msm_mmu *mmu, u64 iova, return (size == 0) ? 0 : -EINVAL; } +static int msm_iommu_pagetable_map_prr(struct msm_mmu *mmu, u64 iova, size_t len, int prot) +{ + struct msm_iommu_pagetable *pagetable = to_pagetable(mmu); + struct io_pgtable_ops *ops = pagetable->pgtbl_ops; + struct msm_iommu *iommu = to_msm_iommu(pagetable->parent); + phys_addr_t phys = page_to_phys(iommu->prr_page); + u64 addr = iova; + + while (len) { + size_t mapped = 0; + size_t size = PAGE_SIZE; + int ret; + + ret = ops->map_pages(ops, addr, phys, size, 1, prot, GFP_KERNEL, &mapped); + + /* map_pages could fail after mapping some of the pages, + * so update the counters before error handling. + */ + addr += mapped; + len -= mapped; + + if (ret) { + msm_iommu_pagetable_unmap(mmu, iova, addr - iova); + return -EINVAL; + } + } + + return 0; +} + static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, struct sg_table *sgt, size_t off, size_t len, int prot) @@ -122,6 +153,9 @@ static int msm_iommu_pagetable_map(struct msm_mmu *mmu, u64 iova, u64 addr = iova; unsigned int i; + if (!sgt) + return msm_iommu_pagetable_map_prr(mmu, iova, len, prot); + for_each_sgtable_sg(sgt, sg, i) { size_t size = sg->length; phys_addr_t phys = sg_phys(sg); @@ -177,9 +211,16 @@ static void msm_iommu_pagetable_destroy(struct msm_mmu *mmu) * If this is the last attached pagetable for the parent, * disable TTBR0 in the arm-smmu driver */ - if (atomic_dec_return(&iommu->pagetables) == 0) + if (atomic_dec_return(&iommu->pagetables) == 0) { adreno_smmu->set_ttbr0_cfg(adreno_smmu->cookie, NULL); + if (adreno_smmu->set_prr_bit) { + adreno_smmu->set_prr_bit(adreno_smmu->cookie, false); + __free_page(iommu->prr_page); + iommu->prr_page = NULL; + } + } + free_io_pgtable_ops(pagetable->pgtbl_ops); kfree(pagetable); } @@ -314,6 +355,25 @@ struct msm_mmu *msm_iommu_pagetable_create(struct msm_mmu *parent) kfree(pagetable); return ERR_PTR(ret); } + + BUG_ON(iommu->prr_page); + if (adreno_smmu->set_prr_bit) { + /* + * We need a zero'd page for two reasons: + * + * 1) Reserve a known physical address to use when + * mapping NULL / sparsely resident regions + * 2) Read back zero + * + * It appears the hw drops writes to the PRR region + * on the floor, but reads actually return whatever + * is in the PRR page. + */ + iommu->prr_page = alloc_page(GFP_KERNEL | __GFP_ZERO); + adreno_smmu->set_prr_addr(adreno_smmu->cookie, + page_to_phys(iommu->prr_page)); + adreno_smmu->set_prr_bit(adreno_smmu->cookie, true); + } } /* Needed later for TLB flush */ diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index a7e48ee1dd95..48bc0374e2ae 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -115,6 +115,8 @@ struct drm_msm_timespec { * ioctl will throw -EPIPE. */ #define MSM_PARAM_EN_VM_BIND 0x15 /* WO, once */ +/* PRR (Partially Resident Region) is required for sparse residency: */ +#define MSM_PARAM_HAS_PRR 0x16 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- 2.47.1