overall, looks pretty good, few small comments inline On Tue, Feb 2, 2016 at 4:06 PM, C Stout <cstout@xxxxxxxxxxxx> wrote: > Change-Id: I6c891515d93a6f1a597e762090c3530a6810c6c6 > --- > drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 50 ++++++++++++++++++++++++++---- > drivers/gpu/drm/msm/adreno/adreno_device.c | 8 +++++ > drivers/gpu/drm/msm/adreno/adreno_gpu.c | 41 ++++++++++++++++-------- > drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++ > 4 files changed, 85 insertions(+), 19 deletions(-) > > diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c > index ef20cb5..9ed3abe 100644 > --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c > @@ -111,11 +111,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) > gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222); > gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000); > gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000); > - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000); > - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); > + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000); > + /* Early A430's have a timing issue with SP/TP power collapse; > + disabling HW clock gating prevents it. */ > + if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2) > + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0); > + else > + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA); > gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0); > } > > + > static void a4xx_me_init(struct msm_gpu *gpu) > { > struct msm_ringbuffer *ring = gpu->rb; > @@ -150,7 +156,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu) > uint32_t *ptr, len; > int i, ret; > > - if (adreno_is_a4xx(adreno_gpu)) { > + if (adreno_is_a420(adreno_gpu)) { > gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F); > gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4); > gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); > @@ -159,6 +165,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu) > gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); > gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); > gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); > + } else if (adreno_is_a430(adreno_gpu)) { > + gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001); > + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818); > + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018); > + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818); > + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018); > + gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003); > } else { > BUG(); > } > @@ -170,6 +183,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu) > gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10); > gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10); > > + if (adreno_is_a430(adreno_gpu)) { > + gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30); > + } > + > /* Enable the RBBM error reporting bits */ > gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001); > > @@ -192,6 +209,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu) > /* Turn on performance counters: */ > gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01); > > + if (adreno_is_a430(adreno_gpu)) > + gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07); > + > /* Disable L2 bypass to avoid UCHE out of bounds errors */ > gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000); > gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000); > @@ -199,6 +219,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu) > gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) | > (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0)); > > + /* On A430 enable SP regfile sleep for power savings */ > + /* TODO downstream does this for !420, so maybe applies for 405 too? */ > + if (!adreno_is_a420(adreno_gpu)) { > + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0, > + 0x00000441); > + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1, > + 0x00000441); > + } > + > a4xx_enable_hwcg(gpu); > > /* > @@ -213,9 +242,11 @@ static int a4xx_hw_init(struct msm_gpu *gpu) > gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val); > } > > - ret = adreno_hw_init(gpu); > - if (ret) > - return ret; hmm, it seems a bit wrong to never call adreno_hw_init() for a430 (esp. when below you have some changes in adreno_hw_init()).. am I not seeing something, or did you miss a hunk? > + if (!adreno_is_a430(adreno_gpu)) { > + ret = adreno_hw_init(gpu); > + if (ret) > + return ret; > + } > > /* setup access protection: */ > gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007); > @@ -326,6 +357,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu) > status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS); > DBG("%s: Int status %08x", gpu->name, status); > > + if (status & (1 << A4XX_INT_CP_REG_PROTECT_FAULT)) { > + uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS); > + printk("CP | Protected mode error| %s | addr=%x\n", > + reg & (1 << 24) ? "WRITE" : "READ", > + (reg & 0xFFFFF) >> 2); > + } > + this looks pretty useful, but also unrelated to a430. Mind splitting it out into it's own patch? > gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status); > > msm_gpu_retire(gpu); > diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c > index 1ea2df5..43d5aaa 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_device.c > +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c > @@ -73,6 +73,14 @@ static const struct adreno_info gpulist[] = { > .pfpfw = "a420_pfp.fw", > .gmem = (SZ_1M + SZ_512K), > .init = a4xx_gpu_init, > + }, { > + .rev = ADRENO_REV(4, 3, 0, ANY_ID), > + .revn = 430, > + .name = "A430", > + .pm4fw = "a420_pm4.fw", > + .pfpfw = "a420_pfp.fw", > + .gmem = (SZ_1M + SZ_512K), > + .init = a4xx_gpu_init, > }, > }; > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > index a3b54cc..33c96d0d 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c > @@ -68,18 +68,15 @@ int adreno_hw_init(struct msm_gpu *gpu) > adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, > /* size is log2(quad-words): */ > AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | > - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8))); > + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | > + (adreno_is_a430(adreno_gpu) ? (1 << 27) : 0)); So.. from the a3xx (apq8016) docs which I do have, bit 27 here is 'RB_NO_UPDATE - Disable writes of Read Pointer to memory'. And actually looks like we already have AXXX_CP_RB_CNTL_NO_UPDATE in the .xml.h so no need to open code it. Downstream commit which makes this change mentions "Sometimes the RPTR shadow memory is unreliable causing timeouts in adreno_idle(). Read it directly from the register instead." This might be a bit easier to understand in the git history if we split the logical equivalent of downstream cdcc02a68649f9929e3f3a5a84bfcac5e480894e into one patch, and then the remaining a430 changes as preceding patch. I think the RB_NO_UPDATE change we could just make unconditional (although if you want to take the safe route, make it conditional on a430 for now and once I have a chance to test it on others I can flip it to unconditional). > /* Setup ringbuffer address: */ > adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova); > - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, > - rbmemptr(adreno_gpu, rptr)); > > - /* Setup scratch/timestamp: */ > - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR, > - rbmemptr(adreno_gpu, fence)); > - > - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1); > + if (!adreno_is_a430(adreno_gpu)) > + adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, > + rbmemptr(adreno_gpu, rptr)); > > return 0; > } > @@ -89,6 +86,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) > return ring->cur - ring->start; > } > > +/* Use this helper to read rptr, since a430 doesn't update rptr in memory */ > +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) > +{ > + if (adreno_is_a430(adreno_gpu)) > + return adreno_gpu->memptrs->rptr = adreno_gpu_read( > + adreno_gpu, REG_ADRENO_CP_RB_RPTR); > + else > + return adreno_gpu->memptrs->rptr; > +} > + > uint32_t adreno_last_fence(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > @@ -137,7 +144,8 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, > if (priv->lastctx == ctx) > break; > case MSM_SUBMIT_CMD_BUF: > - OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2); > + OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ? > + CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2); so far, other than a420 which I still need to test, I think we can switch to 'prefetch enabled' version of the IB packet across the board. But I think it's fine to leave it, I can either drop the hunk if I've had a chance to test a420 or remove it later. > OUT_RING(ring, submit->cmd[i].iova); > OUT_RING(ring, submit->cmd[i].size); > ibs++; > @@ -216,9 +224,16 @@ void adreno_idle(struct msm_gpu *gpu) > { > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > uint32_t wptr = get_wptr(gpu->rb); > + int ret; > > /* wait for CP to drain ringbuffer: */ > - if (spin_until(adreno_gpu->memptrs->rptr == wptr)) > + if (adreno_is_a430(adreno_gpu)) > + ret = spin_until((adreno_gpu->memptrs->rptr = adreno_gpu_read( > + adreno_gpu, REG_ADRENO_CP_RB_RPTR)) == wptr); > + else > + ret = spin_until(adreno_gpu->memptrs->rptr == wptr); I think just use get_rptr() here? BR, -R > + > + if (ret) > DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); > > /* TODO maybe we need to reset GPU here to recover from hang? */ > @@ -237,7 +252,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) > > seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence, > gpu->submitted_fence); > - seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr); > + seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); > seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr); > seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); > > @@ -278,7 +293,7 @@ void adreno_dump_info(struct msm_gpu *gpu) > > printk("fence: %d/%d\n", adreno_gpu->memptrs->fence, > gpu->submitted_fence); > - printk("rptr: %d\n", adreno_gpu->memptrs->rptr); > + printk("rptr: %d\n", get_rptr(adreno_gpu)); > printk("wptr: %d\n", adreno_gpu->memptrs->wptr); > printk("rb wptr: %d\n", get_wptr(gpu->rb)); > > @@ -313,7 +328,7 @@ static uint32_t ring_freewords(struct msm_gpu *gpu) > struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); > uint32_t size = gpu->rb->size / 4; > uint32_t wptr = get_wptr(gpu->rb); > - uint32_t rptr = adreno_gpu->memptrs->rptr; > + uint32_t rptr = get_rptr(adreno_gpu); > return (rptr + (size - 1) - wptr) % size; > } > > diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > index 0a312e9..c26aea1 100644 > --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h > +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h > @@ -228,6 +228,11 @@ static inline int adreno_is_a420(struct adreno_gpu *gpu) > return gpu->revn == 420; > } > > +static inline int adreno_is_a430(struct adreno_gpu *gpu) > +{ > + return gpu->revn == 430; > +} > + > int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value); > int adreno_hw_init(struct msm_gpu *gpu); > uint32_t adreno_last_fence(struct msm_gpu *gpu); > -- > 2.1.4 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > http://lists.freedesktop.org/mailman/listinfo/dri-devel _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel