[PATCH 6/6] drm/msm: a5xx: Support per-instance pagetables

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Support per-instance pagetables for 5XX targets. Per-instance
pagetables allow each open DRM instance to have its own VM memory
space to prevent accidently or maliciously copying or overwriting
buffers from other instances. It also opens the door for SVM since
any given CPU side address can be more reliably mapped into the
instance's GPU VM space without conflict.

To support this create a new dynamic domain (pagetable) for each open
DRM file and map buffer objects for each instance into that pagetable.
Use the GPU to switch to the pagetable for the instance while doing a
submit.

Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx>
---
 arch/arm64/boot/dts/qcom/msm8996.dtsi     |  2 +
 drivers/gpu/drm/msm/adreno/a5xx_gpu.c     | 64 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/msm/adreno/a5xx_gpu.h     | 17 ++++++++
 drivers/gpu/drm/msm/adreno/a5xx_preempt.c | 61 +++++++++++++++++++++++------
 drivers/gpu/drm/msm/adreno/adreno_gpu.h   |  2 +
 drivers/gpu/drm/msm/msm_drv.c             | 60 ++++++++++++++++++++++-------
 drivers/gpu/drm/msm/msm_drv.h             |  3 ++
 drivers/gpu/drm/msm/msm_gem_vma.c         | 38 +++++++++++++++---
 8 files changed, 216 insertions(+), 31 deletions(-)

diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 2903020..6372f3a 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -867,7 +867,9 @@
 
 			qcom,skip-init;
 			qcom,register-save;
+
 			arm,smmu-enable-stall;
+			qcom,dynamic;
 
 			status = "okay";
 		};
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 06238b7..65cd3ef 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -18,7 +18,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/of_reserved_mem.h>
 #include "msm_gem.h"
-#include "msm_mmu.h"
+#include "msm_iommu.h"
 #include "a5xx_gpu.h"
 
 extern bool hang_debug;
@@ -209,6 +209,66 @@ static void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 		gpu_write(gpu, REG_A5XX_CP_RB_WPTR, wptr);
 }
 
+static void a5xx_set_pagetable(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
+	struct msm_file_private *ctx)
+{
+	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+	struct msm_mmu *mmu = ctx->aspace->mmu;
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+	if (!iommu->ttbr0)
+		return;
+
+	/* Turn off protected mode */
+	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+	OUT_RING(ring, 0);
+
+	/* Turn on APIV mode to access critical regions */
+	OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+	OUT_RING(ring, 1);
+
+	/* Make sure the ME is syncronized before staring the update */
+	OUT_PKT7(ring, CP_WAIT_FOR_ME, 0);
+
+	/* Execute the table update */
+	OUT_PKT7(ring, CP_SMMU_TABLE_UPDATE, 3);
+	OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+	OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+	OUT_RING(ring, iommu->contextidr);
+
+	/*
+	 * Write the new TTBR0 to the preemption records - this will be used to
+	 * reload the pagetable if the current ring gets preempted out.
+	 */
+	OUT_PKT7(ring, CP_MEM_WRITE, 4);
+	OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+	OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id, ttbr0)));
+	OUT_RING(ring, lower_32_bits(iommu->ttbr0));
+	OUT_RING(ring, upper_32_bits(iommu->ttbr0));
+
+	/* Also write the current contextidr (ASID) */
+	OUT_PKT7(ring, CP_MEM_WRITE, 3);
+	OUT_RING(ring, lower_32_bits(rbmemptr(adreno_gpu, ring->id,
+		contextidr)));
+	OUT_RING(ring, upper_32_bits(rbmemptr(adreno_gpu, ring->id,
+		contextidr)));
+	OUT_RING(ring, iommu->contextidr);
+
+	/* Invalidate the draw state so we start off fresh */
+	OUT_PKT7(ring, CP_SET_DRAW_STATE, 3);
+	OUT_RING(ring, 0x40000);
+	OUT_RING(ring, 1);
+	OUT_RING(ring, 0);
+
+	/* Turn off APRIV */
+	OUT_PKT4(ring, REG_A5XX_CP_CNTL, 1);
+	OUT_RING(ring, 0);
+
+	/* Turn off protected mode */
+	OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1);
+	OUT_RING(ring, 1);
+}
+
 static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_file_private *ctx)
 {
@@ -219,6 +279,8 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_ringbuffer *ring = submit->ring;
 	unsigned int i, ibs = 0;
 
+	a5xx_set_pagetable(gpu, ring, ctx);
+
 	OUT_PKT7(ring, CP_PREEMPT_ENABLE_GLOBAL, 1);
 	OUT_RING(ring, 0x02);
 
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
index f042a78..19deea0 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h
@@ -47,6 +47,9 @@ struct a5xx_gpu {
 	atomic_t preempt_state;
 	struct timer_list preempt_timer;
 
+	struct a5xx_smmu_info *smmu_info;
+	struct drm_gem_object *smmu_info_bo;
+	uint64_t smmu_info_iova;
 };
 
 #define to_a5xx_gpu(x) container_of(x, struct a5xx_gpu, base)
@@ -127,6 +130,20 @@ struct a5xx_preempt_record {
  */
 #define A5XX_PREEMPT_COUNTER_SIZE (16 * 4)
 
+/*
+ * This is a global structure that the preemption code uses to switch in the
+ * pagetable for the preempted process - the code switches in whatever we
+ * after preempting in a new ring.
+ */
+struct a5xx_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  contextidr;
+};
+
+#define A5XX_SMMU_INFO_MAGIC 0x3618CDA3UL
 
 int a5xx_power_init(struct msm_gpu *gpu);
 void a5xx_gpmu_ucode_init(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
index 582ba9b..1b29049 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -12,6 +12,7 @@
  */
 
 #include "msm_gem.h"
+#include "msm_iommu.h"
 #include "a5xx_gpu.h"
 
 static void *alloc_kernel_bo(struct drm_device *drm, struct msm_gpu *gpu,
@@ -172,6 +173,17 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
 	a5xx_gpu->preempt[ring->id]->wptr = get_wptr(ring);
 	spin_unlock_irqrestore(&ring->lock, flags);
 
+	/* Do read barrier to make sure we have updated pagetable info */
+	rmb();
+
+	/* Set the SMMU info for the preemption */
+	if (a5xx_gpu->smmu_info) {
+		a5xx_gpu->smmu_info->ttbr0 =
+			adreno_gpu->memptrs->ttbr0[ring->id];
+		a5xx_gpu->smmu_info->contextidr =
+			adreno_gpu->memptrs->contextidr[ring->id];
+	}
+
 	/* Set the address of the incoming preemption record */
 	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
 		REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
@@ -247,9 +259,10 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
 		}
 	}
 
-	/* Write a 0 to signal that we aren't switching pagetables */
+	/* Tell the CP where to find the smmu_info buffer */
 	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
-		REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
+		REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+		a5xx_gpu->smmu_info_iova);
 
 	/* Reset the preemption state */
 	set_preempt_state(a5xx_gpu, PREEMPT_NONE);
@@ -311,6 +324,13 @@ void a5xx_preempt_fini(struct msm_gpu *gpu)
 
 		a5xx_gpu->preempt_bo[i] = NULL;
 	}
+
+	if (a5xx_gpu->smmu_info_bo) {
+		if (a5xx_gpu->smmu_info_iova)
+			msm_gem_put_iova(a5xx_gpu->smmu_info_bo, gpu->aspace);
+		drm_gem_object_unreference_unlocked(a5xx_gpu->smmu_info_bo);
+		a5xx_gpu->smmu_info_bo = NULL;
+	}
 }
 
 void a5xx_preempt_init(struct msm_gpu *gpu)
@@ -318,6 +338,9 @@ void a5xx_preempt_init(struct msm_gpu *gpu)
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
 	struct msm_ringbuffer *ring;
+	struct a5xx_smmu_info *ptr;
+	struct drm_gem_object *bo;
+	uint64_t iova;
 	int i;
 
 	/* No preemption if we only have one ring */
@@ -328,18 +351,34 @@ void a5xx_preempt_init(struct msm_gpu *gpu)
 		if (!ring)
 			continue;
 
-		if (preempt_init_ring(a5xx_gpu, ring)) {
-			/*
-			 * On any failure our adventure is over. Clean up and
-			 * set nr_rings to 1 to force preemption off
-			 */
-			a5xx_preempt_fini(gpu);
-			gpu->nr_rings = 1;
+		if (preempt_init_ring(a5xx_gpu, ring))
+			goto fail;
+	}
+
+	if (msm_iommu_allow_dynamic(gpu->aspace->mmu)) {
+		ptr = alloc_kernel_bo(gpu->dev, gpu,
+			sizeof(struct a5xx_smmu_info),
+			MSM_BO_UNCACHED, &bo, &iova);
 
-			return;
-		}
+		if (IS_ERR(ptr))
+			goto fail;
+
+		ptr->magic = A5XX_SMMU_INFO_MAGIC;
+
+		a5xx_gpu->smmu_info_bo = bo;
+		a5xx_gpu->smmu_info_iova = iova;
+		a5xx_gpu->smmu_info = ptr;
 	}
 
 	setup_timer(&a5xx_gpu->preempt_timer, a5xx_preempt_timer,
 		(unsigned long) a5xx_gpu);
+
+	return;
+fail:
+	/*
+	 * On any failure our adventure is over. Clean up and
+	 * set nr_rings to 1 to force preemption off
+	 */
+	a5xx_preempt_fini(gpu);
+	gpu->nr_rings = 1;
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 841ec30..b33cbf0 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -93,6 +93,8 @@ struct adreno_info {
 struct adreno_rbmemptrs {
 	volatile uint32_t rptr[MSM_GPU_MAX_RINGS];
 	volatile uint32_t fence[MSM_GPU_MAX_RINGS];
+	volatile uint64_t ttbr0[MSM_GPU_MAX_RINGS];
+	volatile unsigned int contextidr[MSM_GPU_MAX_RINGS];
 };
 
 struct adreno_gpu {
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 7b7a2e7..1ae4823 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -22,6 +22,8 @@
 #include "msm_fence.h"
 #include "msm_gpu.h"
 #include "msm_kms.h"
+#include "msm_gem.h"
+#include "msm_mmu.h"
 
 
 /*
@@ -515,11 +517,37 @@ static int msm_open(struct drm_device *dev, struct drm_file *file)
 	 */
 	load_gpu(dev);
 
+	if (!priv->gpu)
+		return 0;
+
 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;
 
-	ctx->aspace = priv->gpu->aspace;
+	/*
+	 * FIXME: we will want to use a dynamic name of some sort
+	 * FIXME: WE will need a smarter way to set the range based on target
+	 */
+	ctx->aspace = msm_gem_address_space_create_instance(
+		priv->gpu->aspace->mmu, "gpu", 0x100000000, 0x1ffffffff);
+
+	if (IS_ERR(ctx->aspace)) {
+		int ret = PTR_ERR(ctx->aspace);
+
+		/*
+		 * If dynamic domains are not supported, everybody uses the same
+		 * pagetable
+		 */
+		if (ret == -EOPNOTSUPP)
+			ctx->aspace = priv->gpu->aspace;
+		else {
+			kfree(ctx);
+			return ret;
+		}
+	} else {
+		ctx->aspace->mmu->funcs->attach(ctx->aspace->mmu, NULL, 0);
+	}
+
 	file->driver_priv = ctx;
 
 	return 0;
@@ -534,10 +562,25 @@ static void msm_preclose(struct drm_device *dev, struct drm_file *file)
 	if (ctx == priv->lastctx)
 		priv->lastctx = NULL;
 	mutex_unlock(&dev->struct_mutex);
+}
+
+static void msm_postclose(struct drm_device *dev, struct drm_file *file)
+{
+	struct msm_drm_private *priv = dev->dev_private;
+	struct msm_file_private *ctx = file->driver_priv;
+
+
+	mutex_lock(&dev->struct_mutex);
+	if (ctx && ctx->aspace && ctx->aspace != priv->gpu->aspace) {
+		ctx->aspace->mmu->funcs->detach(ctx->aspace->mmu, NULL, 0);
+		msm_gem_address_space_put(ctx->aspace);
+	}
+	mutex_unlock(&dev->struct_mutex);
 
 	kfree(ctx);
 }
 
+
 static void msm_lastclose(struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -684,17 +727,6 @@ static int msm_ioctl_gem_cpu_fini(struct drm_device *dev, void *data,
 	return ret;
 }
 
-static int msm_ioctl_gem_info_iova(struct drm_device *dev,
-		struct drm_gem_object *obj, uint64_t *iova)
-{
-	struct msm_drm_private *priv = dev->dev_private;
-
-	if (!priv->gpu)
-		return -EINVAL;
-
-	return msm_gem_get_iova(obj, priv->gpu->aspace, iova);
-}
-
 static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		struct drm_file *file)
 {
@@ -710,9 +742,10 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 		return -ENOENT;
 
 	if (args->flags & MSM_INFO_IOVA) {
+		struct msm_file_private *ctx = file->driver_priv;
 		uint64_t iova;
 
-		ret = msm_ioctl_gem_info_iova(dev, obj, &iova);
+		ret = msm_gem_get_iova(obj, ctx->aspace, &iova);
 		if (!ret)
 			args->offset = iova;
 	} else {
@@ -818,6 +851,7 @@ static int msm_ioctl_gem_madvise(struct drm_device *dev, void *data,
 				DRIVER_MODESET,
 	.open               = msm_open,
 	.preclose           = msm_preclose,
+	.postclose	    = msm_postclose,
 	.lastclose          = msm_lastclose,
 	.irq_handler        = msm_irq,
 	.irq_preinstall     = msm_irq_preinstall,
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index bbad6c7..3efb3f1 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -191,6 +191,9 @@ int msm_gem_map_vma(struct msm_gem_address_space *aspace,
 struct msm_gem_address_space *
 msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
 		const char *name);
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct msm_mmu *parent, const char *name,
+		uint64_t start, uint64_t end);
 
 void msm_gem_submit_free(struct msm_gem_submit *submit);
 int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index 2b1d2cb..0f65b19 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -85,9 +85,9 @@ void msm_gem_address_space_put(struct msm_gem_address_space *aspace)
 	return ret;
 }
 
-struct msm_gem_address_space *
-msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
-		const char *name)
+static struct msm_gem_address_space *
+msm_gem_address_space_new(struct msm_mmu *mmu, const char *name,
+		uint64_t start, uint64_t end)
 {
 	struct msm_gem_address_space *aspace;
 
@@ -96,12 +96,38 @@ struct msm_gem_address_space *
 		return ERR_PTR(-ENOMEM);
 
 	aspace->name = name;
-	aspace->mmu = msm_iommu_new(dev, domain);
+	aspace->mmu = mmu;
 
-	drm_mm_init(&aspace->mm, (domain->geometry.aperture_start >> PAGE_SHIFT),
-			(domain->geometry.aperture_end >> PAGE_SHIFT) - 1);
+	drm_mm_init(&aspace->mm, (start >> PAGE_SHIFT),
+		(end >> PAGE_SHIFT) - 1);
 
 	kref_init(&aspace->kref);
 
 	return aspace;
 }
+
+struct msm_gem_address_space *
+msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
+		const char *name)
+{
+	struct msm_mmu *mmu = msm_iommu_new(dev, domain);
+
+	if (IS_ERR(mmu))
+		return (struct msm_gem_address_space *) mmu;
+
+	return msm_gem_address_space_new(mmu, name,
+		domain->geometry.aperture_start,
+		domain->geometry.aperture_end);
+}
+
+struct msm_gem_address_space *
+msm_gem_address_space_create_instance(struct msm_mmu *parent, const char *name,
+		uint64_t start, uint64_t end)
+{
+	struct msm_mmu *child = msm_iommu_new_dynamic(parent);
+
+	if (IS_ERR(child))
+		return (struct msm_gem_address_space *) child;
+
+	return msm_gem_address_space_new(child, name, start, end);
+}
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-arm-msm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [Linux for Sparc]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux