Re: [PATCH 1/2] Support for adreno 430.

Rob Clark <robdclark@xxxxxxxxx> · Tue, 2 Feb 2016 17:31:50 -0500

overall, looks pretty good, few small comments inline

On Tue, Feb 2, 2016 at 4:06 PM, C Stout <cstout@xxxxxxxxxxxx> wrote:
> Change-Id: I6c891515d93a6f1a597e762090c3530a6810c6c6
> ---
>  drivers/gpu/drm/msm/adreno/a4xx_gpu.c      | 50 ++++++++++++++++++++++++++----
>  drivers/gpu/drm/msm/adreno/adreno_device.c |  8 +++++
>  drivers/gpu/drm/msm/adreno/adreno_gpu.c    | 41 ++++++++++++++++--------
>  drivers/gpu/drm/msm/adreno/adreno_gpu.h    |  5 +++
>  4 files changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> index ef20cb5..9ed3abe 100644
> --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> @@ -111,11 +111,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
>         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
>         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
>         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
> -       gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000);
> -       gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
> +       gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
> +       /* Early A430's have a timing issue with SP/TP power collapse;
> +          disabling HW clock gating prevents it. */
> +       if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
> +               gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
> +       else
> +               gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
>         gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
>  }
>
> +
>  static void a4xx_me_init(struct msm_gpu *gpu)
>  {
>         struct msm_ringbuffer *ring = gpu->rb;
> @@ -150,7 +156,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
>         uint32_t *ptr, len;
>         int i, ret;
>
> -       if (adreno_is_a4xx(adreno_gpu)) {
> +       if (adreno_is_a420(adreno_gpu)) {
>                 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
>                 gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
>                 gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
> @@ -159,6 +165,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
>                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
>                 gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
>                 gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
> +       } else if (adreno_is_a430(adreno_gpu)) {
> +               gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
> +               gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
> +               gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
> +               gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
> +               gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
> +               gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
>         } else {
>                 BUG();
>         }
> @@ -170,6 +183,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
>         gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
>         gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
>
> +       if (adreno_is_a430(adreno_gpu)) {
> +               gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
> +       }
> +
>          /* Enable the RBBM error reporting bits */
>         gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
>
> @@ -192,6 +209,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
>         /* Turn on performance counters: */
>         gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
>
> +       if (adreno_is_a430(adreno_gpu))
> +               gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
> +
>         /* Disable L2 bypass to avoid UCHE out of bounds errors */
>         gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
>         gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
> @@ -199,6 +219,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
>         gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
>                         (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
>
> +       /* On A430 enable SP regfile sleep for power savings */
> +       /* TODO downstream does this for !420, so maybe applies for 405 too? */
> +       if (!adreno_is_a420(adreno_gpu)) {
> +               gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
> +                       0x00000441);
> +               gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
> +                       0x00000441);
> +       }
> +
>         a4xx_enable_hwcg(gpu);
>
>         /*
> @@ -213,9 +242,11 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
>                 gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
>         }
>
> -       ret = adreno_hw_init(gpu);
> -       if (ret)
> -               return ret;

hmm, it seems a bit wrong to never call adreno_hw_init() for a430
(esp. when below you have some changes in adreno_hw_init())..  am I
not seeing something, or did you miss a hunk?

> +       if (!adreno_is_a430(adreno_gpu)) {
> +               ret = adreno_hw_init(gpu);
> +               if (ret)
> +                       return ret;
> +       }
>
>         /* setup access protection: */
>         gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
> @@ -326,6 +357,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
>         status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
>         DBG("%s: Int status %08x", gpu->name, status);
>
> +       if (status & (1 << A4XX_INT_CP_REG_PROTECT_FAULT)) {
> +               uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
> +               printk("CP | Protected mode error| %s | addr=%x\n",
> +                       reg & (1 << 24) ? "WRITE" : "READ",
> +                       (reg & 0xFFFFF) >> 2);
> +       }
> +

this looks pretty useful, but also unrelated to a430.  Mind splitting
it out into it's own patch?

>         gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
>
>         msm_gpu_retire(gpu);
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
> index 1ea2df5..43d5aaa 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_device.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
> @@ -73,6 +73,14 @@ static const struct adreno_info gpulist[] = {
>                 .pfpfw = "a420_pfp.fw",
>                 .gmem  = (SZ_1M + SZ_512K),
>                 .init  = a4xx_gpu_init,
> +       }, {
> +               .rev   = ADRENO_REV(4, 3, 0, ANY_ID),
> +               .revn  = 430,
> +               .name  = "A430",
> +               .pm4fw = "a420_pm4.fw",
> +               .pfpfw = "a420_pfp.fw",
> +               .gmem  = (SZ_1M + SZ_512K),
> +               .init  = a4xx_gpu_init,
>         },
>  };
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index a3b54cc..33c96d0d 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -68,18 +68,15 @@ int adreno_hw_init(struct msm_gpu *gpu)
>         adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
>                         /* size is log2(quad-words): */
>                         AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
> -                       AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
> +                       AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
> +                       (adreno_is_a430(adreno_gpu) ? (1 << 27) : 0));

So.. from the a3xx (apq8016) docs which I do have, bit 27 here is
'RB_NO_UPDATE - Disable writes of Read Pointer to memory'.  And
actually looks like we already have AXXX_CP_RB_CNTL_NO_UPDATE in the
.xml.h so no need to open code it.

Downstream commit which makes this change mentions "Sometimes the RPTR
shadow memory is unreliable causing timeouts in adreno_idle().  Read
it directly from the register instead."  This might be a bit easier to
understand in the git history if we split the logical equivalent of
downstream cdcc02a68649f9929e3f3a5a84bfcac5e480894e into one patch,
and then the remaining a430 changes as preceding patch.  I think the
RB_NO_UPDATE change we could just make unconditional (although if you
want to take the safe route, make it conditional on a430 for now and
once I have a chance to test it on others I can flip it to
unconditional).

>         /* Setup ringbuffer address: */
>         adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova);
> -       adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
> -                       rbmemptr(adreno_gpu, rptr));
>
> -       /* Setup scratch/timestamp: */
> -       adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR,
> -                       rbmemptr(adreno_gpu, fence));
> -
> -       adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1);
> +       if (!adreno_is_a430(adreno_gpu))
> +               adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
> +                                               rbmemptr(adreno_gpu, rptr));
>
>         return 0;
>  }
> @@ -89,6 +86,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring)
>         return ring->cur - ring->start;
>  }
>
> +/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
> +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu)
> +{
> +       if (adreno_is_a430(adreno_gpu))
> +               return adreno_gpu->memptrs->rptr = adreno_gpu_read(
> +                       adreno_gpu, REG_ADRENO_CP_RB_RPTR);
> +       else
> +               return adreno_gpu->memptrs->rptr;
> +}
> +
>  uint32_t adreno_last_fence(struct msm_gpu *gpu)
>  {
>         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -137,7 +144,8 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
>                         if (priv->lastctx == ctx)
>                                 break;
>                 case MSM_SUBMIT_CMD_BUF:
> -                       OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
> +                       OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
> +                               CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);

so far, other than a420 which I still need to test, I think we can
switch to 'prefetch enabled' version of the IB packet across the
board.  But I think it's fine to leave it, I can either drop the hunk
if I've had a chance to test a420 or remove it later.

>                         OUT_RING(ring, submit->cmd[i].iova);
>                         OUT_RING(ring, submit->cmd[i].size);
>                         ibs++;
> @@ -216,9 +224,16 @@ void adreno_idle(struct msm_gpu *gpu)
>  {
>         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>         uint32_t wptr = get_wptr(gpu->rb);
> +       int ret;
>
>         /* wait for CP to drain ringbuffer: */
> -       if (spin_until(adreno_gpu->memptrs->rptr == wptr))
> +       if (adreno_is_a430(adreno_gpu))
> +               ret = spin_until((adreno_gpu->memptrs->rptr = adreno_gpu_read(
> +                       adreno_gpu, REG_ADRENO_CP_RB_RPTR)) == wptr);
> +       else
> +               ret = spin_until(adreno_gpu->memptrs->rptr == wptr);

I think just use get_rptr() here?

BR,
-R

> +
> +       if (ret)
>                 DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
>
>         /* TODO maybe we need to reset GPU here to recover from hang? */
> @@ -237,7 +252,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
>
>         seq_printf(m, "fence:    %d/%d\n", adreno_gpu->memptrs->fence,
>                         gpu->submitted_fence);
> -       seq_printf(m, "rptr:     %d\n", adreno_gpu->memptrs->rptr);
> +       seq_printf(m, "rptr:     %d\n", get_rptr(adreno_gpu));
>         seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
>         seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
>
> @@ -278,7 +293,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
>
>         printk("fence:    %d/%d\n", adreno_gpu->memptrs->fence,
>                         gpu->submitted_fence);
> -       printk("rptr:     %d\n", adreno_gpu->memptrs->rptr);
> +       printk("rptr:     %d\n", get_rptr(adreno_gpu));
>         printk("wptr:     %d\n", adreno_gpu->memptrs->wptr);
>         printk("rb wptr:  %d\n", get_wptr(gpu->rb));
>
> @@ -313,7 +328,7 @@ static uint32_t ring_freewords(struct msm_gpu *gpu)
>         struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>         uint32_t size = gpu->rb->size / 4;
>         uint32_t wptr = get_wptr(gpu->rb);
> -       uint32_t rptr = adreno_gpu->memptrs->rptr;
> +       uint32_t rptr = get_rptr(adreno_gpu);
>         return (rptr + (size - 1) - wptr) % size;
>  }
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> index 0a312e9..c26aea1 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> @@ -228,6 +228,11 @@ static inline int adreno_is_a420(struct adreno_gpu *gpu)
>         return gpu->revn == 420;
>  }
>
> +static inline int adreno_is_a430(struct adreno_gpu *gpu)
> +{
> +       return gpu->revn == 430;
> +}
> +
>  int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
>  int adreno_hw_init(struct msm_gpu *gpu);
>  uint32_t adreno_last_fence(struct msm_gpu *gpu);
> --
> 2.1.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel