Re: [PATCH v3 05/10] drm/msm/a6xx: Add skeleton A7xx support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 23/08/2023 15:55, Konrad Dybcio wrote:
A7xx GPUs are - from kernel's POV anyway - basically another generation
of A6xx. They build upon the A650/A660_family advancements, skipping some
writes (presumably more values are preset correctly on reset), adding
some new ones and changing others.

One notable difference is the introduction of a second shadow, called BV.
To handle this with the current code, allocate it right after the current
RPTR shadow.

BV handling and .submit are mostly based on Jonathan Marek's work.

All A7xx GPUs are assumed to have a GMU.
A702 is not an A7xx-class GPU, it's a weird forked A610.

Tested-by: Neil Armstrong <neil.armstrong@xxxxxxxxxx> # on SM8550-QRD
Tested-by: Dmitry Baryshkov <dmitry.baryshkov@xxxxxxxxxx> # sm8450
Signed-off-by: Konrad Dybcio <konrad.dybcio@xxxxxxxxxx>
---
  drivers/gpu/drm/msm/adreno/a6xx_gmu.c   |  95 +++++--
  drivers/gpu/drm/msm/adreno/a6xx_gpu.c   | 451 ++++++++++++++++++++++++++++----
  drivers/gpu/drm/msm/adreno/adreno_gpu.c |   1 +
  drivers/gpu/drm/msm/adreno/adreno_gpu.h |  10 +-
  drivers/gpu/drm/msm/msm_ringbuffer.h    |   2 +
  5 files changed, 478 insertions(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index 03fa89bf3e4b..75984260898e 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -200,9 +200,10 @@ int a6xx_gmu_wait_for_idle(struct a6xx_gmu *gmu)
static int a6xx_gmu_start(struct a6xx_gmu *gmu)
  {
+	struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+	u32 mask, reset_val, val;
  	int ret;
-	u32 val;
-	u32 mask, reset_val;
val = gmu_read(gmu, REG_A6XX_GMU_CM3_DTCM_START + 0xff8);
  	if (val <= 0x20010004) {
@@ -218,7 +219,11 @@ static int a6xx_gmu_start(struct a6xx_gmu *gmu)
  	/* Set the log wptr index
  	 * note: downstream saves the value in poweroff and restores it here
  	 */
-	gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP, 0);
+	if (adreno_is_a7xx(adreno_gpu))
+		gmu_write(gmu, REG_A6XX_GMU_GENERAL_9, 0);
+	else
+		gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_PWR_COL_CP_RESP, 0);
+
gmu_write(gmu, REG_A6XX_GMU_CM3_SYSRESET, 0); @@ -518,7 +523,9 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
  	if (IS_ERR(pdcptr))
  		goto err;
- if (adreno_is_a650(adreno_gpu) || adreno_is_a660_family(adreno_gpu))
+	if (adreno_is_a650(adreno_gpu) ||
+	    adreno_is_a660_family(adreno_gpu) ||
+	    adreno_is_a7xx(adreno_gpu))
  		pdc_in_aop = true;
  	else if (adreno_is_a618(adreno_gpu) || adreno_is_a640_family(adreno_gpu))
  		pdc_address_offset = 0x30090;
@@ -550,7 +557,8 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
  	gmu_write_rscc(gmu, REG_A6XX_RSCC_PDC_MATCH_VALUE_HI, 0x4514);
/* Load RSC sequencer uCode for sleep and wakeup */
-	if (adreno_is_a650_family(adreno_gpu)) {
+	if (adreno_is_a650_family(adreno_gpu) ||
+	    adreno_is_a7xx(adreno_gpu)) {
  		gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0, 0xeaaae5a0);
  		gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 1, 0xe1a1ebab);
  		gmu_write_rscc(gmu, REG_A6XX_RSCC_SEQ_MEM_0_DRV0 + 2, 0xa2e0a581);
@@ -635,11 +643,18 @@ static void a6xx_gmu_rpmh_init(struct a6xx_gmu *gmu)
  /* Set up the idle state for the GMU */
  static void a6xx_gmu_power_config(struct a6xx_gmu *gmu)
  {
+	struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
+	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+
  	/* Disable GMU WB/RB buffer */
  	gmu_write(gmu, REG_A6XX_GMU_SYS_BUS_CONFIG, 0x1);
  	gmu_write(gmu, REG_A6XX_GMU_ICACHE_CONFIG, 0x1);
  	gmu_write(gmu, REG_A6XX_GMU_DCACHE_CONFIG, 0x1);
+ /* A7xx knows better by default! */
+	if (adreno_is_a7xx(adreno_gpu))
+		return;
+
  	gmu_write(gmu, REG_A6XX_GMU_PWR_COL_INTER_FRAME_CTRL, 0x9c40400);
switch (gmu->idle_level) {
@@ -702,7 +717,7 @@ static int a6xx_gmu_fw_load(struct a6xx_gmu *gmu)
  	u32 itcm_base = 0x00000000;
  	u32 dtcm_base = 0x00040000;
- if (adreno_is_a650_family(adreno_gpu))
+	if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu))
  		dtcm_base = 0x10004000;
if (gmu->legacy) {
@@ -751,14 +766,22 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
  {
  	struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
  	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+	u32 fence_range_lower, fence_range_upper;
  	int ret;
  	u32 chipid;
- if (adreno_is_a650_family(adreno_gpu)) {
+	/* Vote veto for FAL10 */
+	if (adreno_is_a650_family(adreno_gpu) || adreno_is_a7xx(adreno_gpu)) {
  		gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FALNEXT_INTF, 1);
  		gmu_write(gmu, REG_A6XX_GPU_GMU_CX_GMU_CX_FAL_INTF, 1);
  	}
+ /* Turn on TCM (Tightly Coupled Memory) retention */
+	if (adreno_is_a7xx(adreno_gpu))
+		a6xx_llc_write(a6xx_gpu, REG_A7XX_CX_MISC_TCM_RET_CNTL, 1);
+	else
+		gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
+
  	if (state == GMU_WARM_BOOT) {
  		ret = a6xx_rpmh_start(gmu);
  		if (ret)
@@ -768,9 +791,6 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
  			"GMU firmware is not loaded\n"))
  			return -ENOENT;
- /* Turn on register retention */
-		gmu_write(gmu, REG_A6XX_GMU_GENERAL_7, 1);
-
  		ret = a6xx_rpmh_start(gmu);
  		if (ret)
  			return ret;
@@ -780,6 +800,7 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
  			return ret;
  	}
+ /* Clear init result to make sure we are getting a fresh value */
  	gmu_write(gmu, REG_A6XX_GMU_CM3_FW_INIT_RESULT, 0);
  	gmu_write(gmu, REG_A6XX_GMU_CM3_BOOT_CONFIG, 0x02);
@@ -787,8 +808,18 @@ static int a6xx_gmu_fw_start(struct a6xx_gmu *gmu, unsigned int state)
  	gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_ADDR, gmu->hfi.iova);
  	gmu_write(gmu, REG_A6XX_GMU_HFI_QTBL_INFO, 1);
+ if (adreno_is_a7xx(adreno_gpu)) {
+		fence_range_upper = 0x32;
+		fence_range_lower = 0x8a0;
+	} else {
+		fence_range_upper = 0xa;
+		fence_range_lower = 0xa0;
+	}
+
  	gmu_write(gmu, REG_A6XX_GMU_AHB_FENCE_RANGE_0,
-		(1 << 31) | (0xa << 18) | (0xa0));
+		  BIT(31) |
+		  FIELD_PREP(GENMASK(30, 18), fence_range_upper) |
+		  FIELD_PREP(GENMASK(17, 0), fence_range_lower));

This fails on arm32 because of the missing #include <linux/bitfields.h>

/*
  	 * Snapshots toggle the NMI bit which will result in a jump to the NMI

--
With best wishes
Dmitry




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux