Add the infrastructure to support the idea of multiple ringbuffers. Assign each ringbuffer an id and use that as an index for the various ring specific operations. The biggest delta is to support legacy fences. Each fence gets its own sequence number but the legacy functions expect to use a unique integer. To handle this we return a unique identifier for each submission but map it to a specific ring/sequence under the covers. Newer users use a dma_fence pointer anyway so they don't care about the actual sequence ID or ring. The actual mechanics for multiple ringbuffers are very target specific so this code just allows for the possibility but still only defines one ringbuffer for each target family. Signed-off-by: Jordan Crouse <jcrouse@xxxxxxxxxxxxxx> --- drivers/gpu/drm/msm/adreno/a3xx_gpu.c | 9 +- drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 9 +- drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 45 +++++----- drivers/gpu/drm/msm/adreno/a5xx_gpu.h | 2 +- drivers/gpu/drm/msm/adreno/a5xx_power.c | 6 +- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 133 +++++++++++++++++------------ drivers/gpu/drm/msm/adreno/adreno_gpu.h | 20 +++-- drivers/gpu/drm/msm/msm_drv.h | 2 + drivers/gpu/drm/msm/msm_gem.h | 3 +- drivers/gpu/drm/msm/msm_gem_submit.c | 5 +- drivers/gpu/drm/msm/msm_gpu.c | 146 +++++++++++++++++++++----------- drivers/gpu/drm/msm/msm_gpu.h | 39 ++++----- drivers/gpu/drm/msm/msm_ringbuffer.c | 27 +++--- drivers/gpu/drm/msm/msm_ringbuffer.h | 19 ++++- 14 files changed, 287 insertions(+), 178 deletions(-) diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c index 789f7fb..4baef27 100644 --- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c @@ -44,7 +44,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -65,7 +65,7 @@ static bool a3xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a3xx_idle(gpu); } @@ -339,7 +339,7 @@ static void a3xx_destroy(struct msm_gpu *gpu) static bool a3xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -446,6 +446,7 @@ static void a3xx_dump(struct msm_gpu *gpu) .recover = a3xx_recover, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a3xx_irq, .destroy = a3xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -491,7 +492,7 @@ struct msm_gpu *a3xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a3xx_registers; adreno_gpu->reg_offsets = a3xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c index f87c4312..8199a4b 100644 --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c @@ -116,7 +116,7 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu) static bool a4xx_me_init(struct msm_gpu *gpu) { - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT3(ring, CP_ME_INIT, 17); OUT_RING(ring, 0x000003f7); @@ -137,7 +137,7 @@ static bool a4xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); return a4xx_idle(gpu); } @@ -337,7 +337,7 @@ static void a4xx_destroy(struct msm_gpu *gpu) static bool a4xx_idle(struct msm_gpu *gpu) { /* wait for ringbuffer to drain: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, gpu->rb[0])) return false; /* then wait for GPU to finish: */ @@ -534,6 +534,7 @@ static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value) .recover = a4xx_recover, .submit = adreno_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a4xx_irq, .destroy = a4xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -573,7 +574,7 @@ struct msm_gpu *a4xx_gpu_init(struct drm_device *dev) adreno_gpu->registers = a4xx_registers; adreno_gpu->reg_offsets = a4xx_register_offsets; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) goto fail; diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c index c986a76..a195d5d 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c @@ -84,7 +84,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx) { struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned int i, ibs = 0; for (i = 0; i < submit->nr_cmds; i++) { @@ -109,11 +109,11 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT7(ring, CP_EVENT_WRITE, 4); OUT_RING(ring, CACHE_FLUSH_TS | (1 << 31)); - OUT_RING(ring, lower_32_bits(rbmemptr(gpu, fence))); - OUT_RING(ring, upper_32_bits(rbmemptr(gpu, fence))); + OUT_RING(ring, lower_32_bits(rbmemptr(ring, fence))); + OUT_RING(ring, upper_32_bits(rbmemptr(ring, fence))); OUT_RING(ring, submit->seqno); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } static const struct { @@ -229,7 +229,7 @@ void a5xx_set_hwcg(struct msm_gpu *gpu, bool state) static int a5xx_me_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; OUT_PKT7(ring, CP_ME_INIT, 8); @@ -260,9 +260,8 @@ static int a5xx_me_init(struct msm_gpu *gpu) OUT_RING(ring, 0x00000000); OUT_RING(ring, 0x00000000); - gpu->funcs->flush(gpu); - - return a5xx_idle(gpu) ? 0 : -EINVAL; + gpu->funcs->flush(gpu, ring); + return a5xx_idle(gpu, ring) ? 0 : -EINVAL; } static struct drm_gem_object *a5xx_ucode_load_bo(struct msm_gpu *gpu, @@ -593,11 +592,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) * ticking correctly */ if (adreno_is_a530(adreno_gpu)) { - OUT_PKT7(gpu->rb, CP_EVENT_WRITE, 1); - OUT_RING(gpu->rb, 0x0F); + OUT_PKT7(gpu->rb[0], CP_EVENT_WRITE, 1); + OUT_RING(gpu->rb[0], 0x0F); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } @@ -610,11 +609,11 @@ static int a5xx_hw_init(struct msm_gpu *gpu) */ ret = a5xx_zap_shader_init(gpu); if (!ret) { - OUT_PKT7(gpu->rb, CP_SET_SECURE_MODE, 1); - OUT_RING(gpu->rb, 0x00000000); + OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); + OUT_RING(gpu->rb[0], 0x00000000); - gpu->funcs->flush(gpu); - if (!a5xx_idle(gpu)) + gpu->funcs->flush(gpu, gpu->rb[0]); + if (!a5xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else { /* Print a warning so if we die, we know why */ @@ -691,18 +690,19 @@ static inline bool _a5xx_check_idle(struct msm_gpu *gpu) A5XX_RBBM_INT_0_MASK_MISC_HANG_DETECT); } -bool a5xx_idle(struct msm_gpu *gpu) +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { /* wait for CP to drain ringbuffer: */ - if (!adreno_idle(gpu)) + if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a5xx_check_idle(gpu))) { - DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X\n", + DRM_ERROR("%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A5XX_RBBM_STATUS), - gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS)); - + gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS), + gpu_read(gpu, REG_A5XX_CP_RB_RPTR), + gpu_read(gpu, REG_A5XX_CP_RB_WPTR)); return false; } @@ -1026,6 +1026,7 @@ static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m) .recover = a5xx_recover, .submit = a5xx_submit, .flush = adreno_flush, + .active_ring = adreno_active_ring, .irq = a5xx_irq, .destroy = a5xx_destroy, #ifdef CONFIG_DEBUG_FS @@ -1061,7 +1062,7 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev) a5xx_gpu->lm_leakage = 0x4E001A; - ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs); + ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1); if (ret) { a5xx_destroy(&(a5xx_gpu->base.base)); return ERR_PTR(ret); diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h index 3d62f00..8f4f093 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.h +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.h @@ -57,7 +57,7 @@ static inline int spin_usecs(struct msm_gpu *gpu, uint32_t usecs, return -ETIMEDOUT; } -bool a5xx_idle(struct msm_gpu *gpu); +bool a5xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); void a5xx_set_hwcg(struct msm_gpu *gpu, bool state); #endif /* __A5XX_GPU_H__ */ diff --git a/drivers/gpu/drm/msm/adreno/a5xx_power.c b/drivers/gpu/drm/msm/adreno/a5xx_power.c index 04aab1d..d09a716 100644 --- a/drivers/gpu/drm/msm/adreno/a5xx_power.c +++ b/drivers/gpu/drm/msm/adreno/a5xx_power.c @@ -173,7 +173,7 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu); - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = gpu->rb[0]; if (!a5xx_gpu->gpmu_dwords) return 0; @@ -192,9 +192,9 @@ static int a5xx_gpmu_init(struct msm_gpu *gpu) OUT_PKT7(ring, CP_SET_PROTECTED_MODE, 1); OUT_RING(ring, 1); - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); - if (!a5xx_idle(gpu)) { + if (!a5xx_idle(gpu, ring)) { DRM_ERROR("%s: Unable to load GPMU firmware. GPMU will not be active\n", gpu->name); return -EINVAL; diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 552e692..5f62244 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -21,7 +21,6 @@ #include "msm_gem.h" #include "msm_mmu.h" -#define RB_SIZE SZ_32K #define RB_BLKSIZE 32 int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) @@ -60,38 +59,47 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value) int adreno_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - int ret; + int i; DBG("%s", gpu->name); - ret = msm_gem_get_iova(gpu->rb->bo, gpu->aspace, &gpu->rb_iova); - if (ret) { - gpu->rb_iova = 0; - dev_err(gpu->dev->dev, "could not map ringbuffer: %d\n", ret); - return ret; - } + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + int ret; - /* reset ringbuffer: */ - gpu->rb->cur = gpu->rb->start; + if (!ring) + continue; - /* reset completed fence seqno: */ - gpu->memptrs->fence = gpu->seqno; - gpu->memptrs->rptr = 0; + ret = msm_gem_get_iova(ring->bo, gpu->aspace, &ring->iova); + if (ret) { + ring->iova = 0; + dev_err(gpu->dev->dev, + "could not map ringbuffer %d: %d\n", i, ret); + return ret; + } + + ring->cur = ring->start; + + /* reset completed fence seqno: */ + ring->memptrs->fence = ring->seqno; + ring->memptrs->rptr = 0; + } /* Setup REG_CP_RB_CNTL: */ adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL, - /* size is log2(quad-words): */ - AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) | - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | - (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); + /* size is log2(quad-words): */ + AXXX_CP_RB_CNTL_BUFSZ(ilog2(MSM_GPU_RINGBUFFER_SZ / 8)) | + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) | + (adreno_is_a430(adreno_gpu) ? AXXX_CP_RB_CNTL_NO_UPDATE : 0)); - /* Setup ringbuffer address: */ + /* Setup ringbuffer address - use ringbuffer[0] for GPU init */ adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_BASE, - REG_ADRENO_CP_RB_BASE_HI, gpu->rb_iova); + REG_ADRENO_CP_RB_BASE_HI, gpu->rb[0]->iova); if (!adreno_is_a430(adreno_gpu)) { adreno_gpu_write64(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR, - REG_ADRENO_CP_RB_RPTR_ADDR_HI, rbmemptr(gpu, rptr)); + REG_ADRENO_CP_RB_RPTR_ADDR_HI, + rbmemptr(gpu->rb[0], rptr)); } return 0; @@ -103,15 +111,19 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring) } /* Use this helper to read rptr, since a430 doesn't update rptr in memory */ -static uint32_t get_rptr(struct adreno_gpu *adreno_gpu) +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu, + struct msm_ringbuffer *ring) { - struct msm_gpu *gpu = &adreno_gpu->base; - if (adreno_is_a430(adreno_gpu)) - return gpu->memptrs->rptr = adreno_gpu_read( + return ring->memptrs->rptr = adreno_gpu_read( adreno_gpu, REG_ADRENO_CP_RB_RPTR); else - return gpu->memptrs->rptr; + return ring->memptrs->rptr; +} + +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu) +{ + return gpu->rb[0]; } void adreno_recover(struct msm_gpu *gpu) @@ -137,7 +149,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct msm_drm_private *priv = gpu->dev->dev_private; - struct msm_ringbuffer *ring = gpu->rb; + struct msm_ringbuffer *ring = submit->ring; unsigned i; for (i = 0; i < submit->nr_cmds; i++) { @@ -176,7 +188,7 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, OUT_PKT3(ring, CP_EVENT_WRITE, 3); OUT_RING(ring, CACHE_FLUSH_TS); - OUT_RING(ring, rbmemptr(gpu, fence)); + OUT_RING(ring, rbmemptr(ring, fence)); OUT_RING(ring, submit->seqno); /* we could maybe be clever and only CP_COND_EXEC the interrupt: */ @@ -203,10 +215,10 @@ void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, } #endif - gpu->funcs->flush(gpu); + gpu->funcs->flush(gpu, ring); } -void adreno_flush(struct msm_gpu *gpu) +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); uint32_t wptr; @@ -216,7 +228,7 @@ void adreno_flush(struct msm_gpu *gpu) * to account for the possibility that the last command fit exactly into * the ringbuffer and rb->next hasn't wrapped to zero yet */ - wptr = get_wptr(gpu->rb) & ((gpu->rb->size / 4) - 1); + wptr = get_wptr(ring) % (MSM_GPU_RINGBUFFER_SZ >> 2); /* ensure writes to ringbuffer have hit system memory: */ mb(); @@ -224,17 +236,18 @@ void adreno_flush(struct msm_gpu *gpu) adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_WPTR, wptr); } -bool adreno_idle(struct msm_gpu *gpu) +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t wptr = get_wptr(gpu->rb); + uint32_t wptr = get_wptr(ring); /* wait for CP to drain ringbuffer: */ - if (!spin_until(get_rptr(adreno_gpu) == wptr)) + if (!spin_until(get_rptr(adreno_gpu, ring) == wptr)) return true; /* TODO maybe we need to reset GPU here to recover from hang? */ - DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name); + DRM_ERROR("%s: timeout waiting to drain ringbuffer %d!\n", gpu->name, + ring->id); return false; } @@ -249,10 +262,16 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - seq_printf(m, "fence: %d/%d\n", gpu->memptrs->fence, - gpu->seqno); - seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu)); - seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + seq_printf(m, "rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, ring->seqno); + + seq_printf(m, " rptr: %d\n", + get_rptr(adreno_gpu, ring)); + seq_printf(m, "rb wptr: %d\n", get_wptr(ring)); + } /* dump these out in a form that can be parsed by demsm: */ seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name); @@ -278,16 +297,23 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m) void adreno_dump_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); + int i; printk("revision: %d (%d.%d.%d.%d)\n", adreno_gpu->info->revn, adreno_gpu->rev.core, adreno_gpu->rev.major, adreno_gpu->rev.minor, adreno_gpu->rev.patchid); - printk("fence: %d/%d\n", gpu->memptrs->fence, - gpu->seqno); - printk("rptr: %d\n", get_rptr(adreno_gpu)); - printk("rb wptr: %d\n", get_wptr(gpu->rb)); + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + printk("rb %d: fence: %d/%d\n", i, + ring->memptrs->fence, + ring->seqno); + + printk("rptr: %d\n", get_rptr(adreno_gpu, ring)); + printk("rb wptr: %d\n", get_wptr(ring)); + } } /* would be nice to not have to duplicate the _show() stuff with printk(): */ @@ -310,23 +336,26 @@ void adreno_dump(struct msm_gpu *gpu) } } -static uint32_t ring_freewords(struct msm_gpu *gpu) +static uint32_t ring_freewords(struct msm_ringbuffer *ring) { - struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); - uint32_t size = gpu->rb->size / 4; - uint32_t wptr = get_wptr(gpu->rb); - uint32_t rptr = get_rptr(adreno_gpu); + struct adreno_gpu *adreno_gpu = to_adreno_gpu(ring->gpu); + uint32_t size = MSM_GPU_RINGBUFFER_SZ >> 2; + uint32_t wptr = get_wptr(ring); + uint32_t rptr = get_rptr(adreno_gpu, ring); return (rptr + (size - 1) - wptr) % size; } -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords) +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords) { - if (spin_until(ring_freewords(gpu) >= ndwords)) - DRM_ERROR("%s: timeout waiting for ringbuffer space\n", gpu->name); + if (spin_until(ring_freewords(ring) >= ndwords)) + DRM_DEV_ERROR(ring->gpu->dev->dev, + "timeout waiting for space in ringubffer %d\n", + ring->id); } int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *adreno_gpu, const struct adreno_gpu_funcs *funcs) + struct adreno_gpu *adreno_gpu, + const struct adreno_gpu_funcs *funcs, int nr_rings) { struct adreno_platform_config *config = pdev->dev.platform_data; struct msm_gpu_config adreno_gpu_config = { 0 }; @@ -354,7 +383,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, adreno_gpu_config.va_start = SZ_16M; adreno_gpu_config.va_end = 0xffffffff; - adreno_gpu_config.ringsz = RB_SIZE; + adreno_gpu_config.nr_rings = nr_rings; pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD); pm_runtime_use_autosuspend(&pdev->dev); diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h index 5ee1d6a..2a51ac4 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h @@ -185,17 +185,19 @@ static inline int adreno_is_a530(struct adreno_gpu *gpu) void adreno_recover(struct msm_gpu *gpu); void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); -void adreno_flush(struct msm_gpu *gpu); -bool adreno_idle(struct msm_gpu *gpu); +void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring); +bool adreno_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring); #ifdef CONFIG_DEBUG_FS void adreno_show(struct msm_gpu *gpu, struct seq_file *m); #endif void adreno_dump_info(struct msm_gpu *gpu); void adreno_dump(struct msm_gpu *gpu); -void adreno_wait_ring(struct msm_gpu *gpu, uint32_t ndwords); +void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords); +struct msm_ringbuffer *adreno_active_ring(struct msm_gpu *gpu); int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, - struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs); + struct adreno_gpu *gpu, const struct adreno_gpu_funcs *funcs, + int nr_rings); void adreno_gpu_cleanup(struct adreno_gpu *gpu); @@ -204,7 +206,7 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, static inline void OUT_PKT0(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE0_PKT | ((cnt-1) << 16) | (regindx & 0x7FFF)); } @@ -212,14 +214,14 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev, static inline void OUT_PKT2(struct msm_ringbuffer *ring) { - adreno_wait_ring(ring->gpu, 1); + adreno_wait_ring(ring, 1); OUT_RING(ring, CP_TYPE2_PKT); } static inline void OUT_PKT3(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt+1); + adreno_wait_ring(ring, cnt+1); OUT_RING(ring, CP_TYPE3_PKT | ((cnt-1) << 16) | ((opcode & 0xFF) << 8)); } @@ -241,14 +243,14 @@ static inline u32 PM4_PARITY(u32 val) static inline void OUT_PKT4(struct msm_ringbuffer *ring, uint16_t regindx, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, PKT4(regindx, cnt)); } static inline void OUT_PKT7(struct msm_ringbuffer *ring, uint8_t opcode, uint16_t cnt) { - adreno_wait_ring(ring->gpu, cnt + 1); + adreno_wait_ring(ring, cnt + 1); OUT_RING(ring, CP_TYPE7_PKT | (cnt << 0) | (PM4_PARITY(cnt) << 15) | ((opcode & 0x7F) << 16) | (PM4_PARITY(opcode) << 23)); } diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index 696413d..079a5b0 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -76,6 +76,8 @@ struct msm_vblank_ctrl { spinlock_t lock; }; +#define MSM_GPU_MAX_RINGS 1 + struct msm_drm_private { struct drm_device *dev; diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h index c98d3a7..9320e18 100644 --- a/drivers/gpu/drm/msm/msm_gem.h +++ b/drivers/gpu/drm/msm/msm_gem.h @@ -138,7 +138,7 @@ enum msm_gem_lock { struct msm_gem_submit { struct drm_device *dev; struct msm_gpu *gpu; - struct list_head node; /* node in gpu submit_list */ + struct list_head node; /* node in ring submit list */ struct list_head bo_list; struct ww_acquire_ctx ticket; uint32_t seqno; /* Sequence number of the submit on the ring */ @@ -146,6 +146,7 @@ struct msm_gem_submit { struct msm_gpu_submitqueue *queue; struct pid *pid; /* submitting process */ bool valid; /* true if no cmdstream patching needed */ + struct msm_ringbuffer *ring; unsigned int nr_cmds; unsigned int nr_bos; struct { diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c index ac95816..5404b18 100644 --- a/drivers/gpu/drm/msm/msm_gem_submit.c +++ b/drivers/gpu/drm/msm/msm_gem_submit.c @@ -399,7 +399,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, struct msm_gpu_submitqueue *queue; int out_fence_fd = -1; unsigned i; - int ret; + int ret, ring; if (!gpu) return -ENXIO; @@ -532,6 +532,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data, submit->nr_cmds = i; + ring = clamp_t(uint32_t, queue->prio, 0, gpu->nr_rings - 1); + submit->ring = gpu->rb[ring]; + submit->fence = msm_fence_alloc(queue->fctx); if (IS_ERR(submit->fence)) { ret = PTR_ERR(submit->fence); diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index 9767d38..880d69d 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -221,11 +221,12 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) * Hangcheck detection for locked gpu: */ -static void update_fences(struct msm_gpu *gpu, uint32_t fence) +static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, + uint32_t fence) { struct msm_gem_submit *submit; - list_for_each_entry(submit, &gpu->submit_list, node) { + list_for_each_entry(submit, &ring->submits, node) { if (submit->seqno > fence) break; @@ -241,15 +242,34 @@ static void recover_worker(struct work_struct *work) struct msm_gpu *gpu = container_of(work, struct msm_gpu, recover_work); struct drm_device *dev = gpu->dev; struct msm_gem_submit *submit; - uint32_t fence = gpu->memptrs->fence; + struct msm_ringbuffer *cur_ring = gpu->funcs->active_ring(gpu); + uint64_t fence; + int i; + + /* Update all the rings with the latest and greatest fence */ + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; - update_fences(gpu, fence + 1); + fence = ring->memptrs->fence; + + /* + * For the current (faulting?) ring/submit advance the fence by + * one more to clear the faulting submit + */ + if (ring == cur_ring) + fence = fence + 1; + + update_fences(gpu, ring, fence); + } mutex_lock(&dev->struct_mutex); + dev_err(dev->dev, "%s: hangcheck recover!\n", gpu->name); - list_for_each_entry(submit, &gpu->submit_list, node) { - if (submit->seqno == (fence + 1)) { + fence = cur_ring->memptrs->fence + 1; + + list_for_each_entry(submit, &cur_ring->submits, node) { + if (submit->seqno == fence) { struct task_struct *task; rcu_read_lock(); @@ -271,9 +291,16 @@ static void recover_worker(struct work_struct *work) gpu->funcs->recover(gpu); pm_runtime_put_sync(&gpu->pdev->dev); - /* replay the remaining submits after the one that hung: */ - list_for_each_entry(submit, &gpu->submit_list, node) { - gpu->funcs->submit(gpu, submit, NULL); + /* + * Replay all remaining submits starting with highest priority + * ring + */ + + for (i = gpu->nr_rings - 1; i >= 0; i--) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + list_for_each_entry(submit, &ring->submits, node) + gpu->funcs->submit(gpu, submit, NULL); } } @@ -294,25 +321,27 @@ static void hangcheck_handler(unsigned long data) struct msm_gpu *gpu = (struct msm_gpu *)data; struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; - uint32_t fence = gpu->memptrs->fence; + struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); + uint32_t fence = ring->memptrs->fence; - if (fence != gpu->hangcheck_fence) { + if (fence != ring->hangcheck_fence) { /* some progress has been made.. ya! */ - gpu->hangcheck_fence = fence; - } else if (fence < gpu->seqno) { + ring->hangcheck_fence = fence; + } else if (fence < ring->seqno) { /* no progress and not done.. hung! */ - gpu->hangcheck_fence = fence; - dev_err(dev->dev, "%s: hangcheck detected gpu lockup!\n", - gpu->name); + ring->hangcheck_fence = fence; + dev_err(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n", + gpu->name, ring->id); dev_err(dev->dev, "%s: completed fence: %u\n", gpu->name, fence); dev_err(dev->dev, "%s: submitted fence: %u\n", - gpu->name, gpu->seqno); + gpu->name, ring->seqno); + queue_work(priv->wq, &gpu->recover_work); } /* if still more pending work, reset the hangcheck timer: */ - if (gpu->seqno > gpu->hangcheck_fence) + if (ring->seqno > ring->hangcheck_fence) hangcheck_timer_reset(gpu); /* workaround for missing irq: */ @@ -441,19 +470,18 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) static void retire_submits(struct msm_gpu *gpu) { struct drm_device *dev = gpu->dev; + struct msm_gem_submit *submit, *tmp; + int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); - while (!list_empty(&gpu->submit_list)) { - struct msm_gem_submit *submit; - - submit = list_first_entry(&gpu->submit_list, - struct msm_gem_submit, node); + /* Retire the commits starting with highest priority */ + for (i = gpu->nr_rings - 1; i >= 0; i--) { + struct msm_ringbuffer *ring = gpu->rb[i]; - if (dma_fence_is_signaled(submit->fence)) { - retire_submit(gpu, submit); - } else { - break; + list_for_each_entry_safe(submit, tmp, &ring->submits, node) { + if (dma_fence_is_signaled(submit->fence)) + retire_submit(gpu, submit); } } } @@ -462,9 +490,10 @@ static void retire_worker(struct work_struct *work) { struct msm_gpu *gpu = container_of(work, struct msm_gpu, retire_work); struct drm_device *dev = gpu->dev; - uint32_t fence = gpu->memptrs->fence; + int i; - update_fences(gpu, fence); + for (i = 0; i < gpu->nr_rings; i++) + update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); mutex_lock(&dev->struct_mutex); retire_submits(gpu); @@ -485,6 +514,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, { struct drm_device *dev = gpu->dev; struct msm_drm_private *priv = dev->dev_private; + struct msm_ringbuffer *ring = submit->ring; int i; WARN_ON(!mutex_is_locked(&dev->struct_mutex)); @@ -495,7 +525,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit, submit->seqno = ++gpu->seqno; - list_add_tail(&submit->node, &gpu->submit_list); + list_add_tail(&submit->node, &ring->submits); msm_rd_dump_submit(submit); @@ -620,7 +650,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs, const char *name, struct msm_gpu_config *config) { - int ret; + int i, ret, nr_rings = config->nr_rings; + void *memptrs; + uint64_t memptrs_iova; if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs))) gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs); @@ -633,7 +665,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, INIT_WORK(&gpu->retire_work, retire_worker); INIT_WORK(&gpu->recover_work, recover_worker); - INIT_LIST_HEAD(&gpu->submit_list); setup_timer(&gpu->hangcheck_timer, hangcheck_handler, (unsigned long)gpu); @@ -698,29 +729,47 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, goto fail; } - gpu->memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), + memptrs = msm_gem_kernel_new(drm, sizeof(*gpu->memptrs_bo), MSM_BO_UNCACHED, gpu->aspace, &gpu->memptrs_bo, - &gpu->memptrs_iova); + &memptrs_iova); - if (IS_ERR(gpu->memptrs)) { - ret = PTR_ERR(gpu->memptrs); - gpu->memptrs = NULL; + if (IS_ERR(memptrs)) { + ret = PTR_ERR(memptrs); dev_err(drm->dev, "could not allocate memptrs: %d\n", ret); goto fail; } - /* Create ringbuffer: */ - gpu->rb = msm_ringbuffer_new(gpu, config->ringsz); - if (IS_ERR(gpu->rb)) { - ret = PTR_ERR(gpu->rb); - gpu->rb = NULL; - dev_err(drm->dev, "could not create ringbuffer: %d\n", ret); - goto fail; + if (nr_rings > ARRAY_SIZE(gpu->rb)) { + DRM_DEV_INFO_ONCE(drm->dev, "Only creating %lu ringbuffers\n", + ARRAY_SIZE(gpu->rb)); + nr_rings = ARRAY_SIZE(gpu->rb); + } + + /* Create ringbuffer(s): */ + for (i = 0; i < nr_rings; i++) { + gpu->rb[i] = msm_ringbuffer_new(gpu, i, memptrs, memptrs_iova); + + if (IS_ERR(gpu->rb[i])) { + ret = PTR_ERR(gpu->rb[i]); + dev_err(drm->dev, + "could not create ringbuffer %d: %d\n", i, ret); + goto fail; + } + + memptrs += sizeof(struct msm_rbmemptrs); + memptrs_iova += sizeof(struct msm_rbmemptrs); } + gpu->nr_rings = nr_rings; + return 0; fail: + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; + } + if (gpu->memptrs_bo) { msm_gem_put_vaddr(gpu->memptrs_bo); msm_gem_put_iova(gpu->memptrs_bo, gpu->aspace); @@ -739,16 +788,17 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev, void msm_gpu_cleanup(struct msm_gpu *gpu) { + int i; + DBG("%s", gpu->name); WARN_ON(!list_empty(&gpu->active_list)); bs_fini(gpu); - if (gpu->rb) { - if (gpu->rb_iova) - msm_gem_put_iova(gpu->rb->bo, gpu->aspace); - msm_ringbuffer_destroy(gpu->rb); + for (i = 0; i < ARRAY_SIZE(gpu->rb); i++) { + msm_ringbuffer_destroy(gpu->rb[i]); + gpu->rb[i] = NULL; } if (gpu->memptrs_bo) { diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h index fcd735c..dbc4835 100644 --- a/drivers/gpu/drm/msm/msm_gpu.h +++ b/drivers/gpu/drm/msm/msm_gpu.h @@ -33,7 +33,7 @@ struct msm_gpu_config { const char *irqname; uint64_t va_start; uint64_t va_end; - unsigned int ringsz; + unsigned int nr_rings; }; /* So far, with hardware that I've seen to date, we can have: @@ -57,8 +57,9 @@ struct msm_gpu_funcs { int (*pm_resume)(struct msm_gpu *gpu); void (*submit)(struct msm_gpu *gpu, struct msm_gem_submit *submit, struct msm_file_private *ctx); - void (*flush)(struct msm_gpu *gpu); + void (*flush)(struct msm_gpu *gpu, struct msm_ringbuffer *ring); irqreturn_t (*irq)(struct msm_gpu *irq); + struct msm_ringbuffer *(*active_ring)(struct msm_gpu *gpu); void (*recover)(struct msm_gpu *gpu); void (*destroy)(struct msm_gpu *gpu); #ifdef CONFIG_DEBUG_FS @@ -67,14 +68,6 @@ struct msm_gpu_funcs { #endif }; -#define rbmemptr(gpu, member) \ - ((gpu)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) - -struct msm_rbmemptrs { - volatile uint32_t rptr; - volatile uint32_t fence; -}; - struct msm_gpu { const char *name; struct drm_device *dev; @@ -93,9 +86,8 @@ struct msm_gpu { const struct msm_gpu_perfcntr *perfcntrs; uint32_t num_perfcntrs; - /* ringbuffer: */ - struct msm_ringbuffer *rb; - uint64_t rb_iova; + struct msm_ringbuffer *rb[MSM_GPU_MAX_RINGS]; + int nr_rings; /* list of GEM active objects: */ struct list_head active_list; @@ -133,21 +125,26 @@ struct msm_gpu { #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */ #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD) struct timer_list hangcheck_timer; - uint32_t hangcheck_fence; struct work_struct recover_work; - struct list_head submit_list; - - struct msm_rbmemptrs *memptrs; struct drm_gem_object *memptrs_bo; - uint64_t memptrs_iova; - - }; +/* It turns out that all targets use the same ringbuffer size */ +#define MSM_GPU_RINGBUFFER_SZ SZ_32K + static inline bool msm_gpu_active(struct msm_gpu *gpu) { - return gpu->seqno > gpu->memptrs->fence; + int i; + + for (i = 0; i < gpu->nr_rings; i++) { + struct msm_ringbuffer *ring = gpu->rb[i]; + + if (ring->seqno > ring->memptrs->fence) + return true; + } + + return false; } /* Perf-Counters: diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c index bf065a5..a74d66a 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.c +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c @@ -18,13 +18,14 @@ #include "msm_ringbuffer.h" #include "msm_gpu.h" -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova) { struct msm_ringbuffer *ring; int ret; - if (WARN_ON(!is_power_of_2(size))) - return ERR_PTR(-EINVAL); + /* We assume everwhere that MSM_GPU_RINGBUFFER_SZ is a power of 2 */ + BUILD_BUG_ON(!is_power_of_2(MSM_GPU_RINGBUFFER_SZ)); ring = kzalloc(sizeof(*ring), GFP_KERNEL); if (!ring) { @@ -33,32 +34,38 @@ struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size) } ring->gpu = gpu; - + ring->id = id; /* Pass NULL for the iova pointer - we will map it later */ - ring->start = msm_gem_kernel_new(gpu->dev, size, MSM_BO_WC, - gpu->aspace, &ring->bo, NULL); + ring->start = msm_gem_kernel_new(gpu->dev, MSM_GPU_RINGBUFFER_SZ, + MSM_BO_WC, gpu->aspace, &ring->bo, NULL); if (IS_ERR(ring->start)) { ret = PTR_ERR(ring->start); ring->start = 0; goto fail; } - ring->end = ring->start + (size / 4); + ring->end = ring->start + (MSM_GPU_RINGBUFFER_SZ >> 2); ring->cur = ring->start; - ring->size = size; + ring->memptrs = memptrs; + ring->memptrs_iova = memptrs_iova; + + INIT_LIST_HEAD(&ring->submits); return ring; fail: - if (ring) - msm_ringbuffer_destroy(ring); + msm_ringbuffer_destroy(ring); return ERR_PTR(ret); } void msm_ringbuffer_destroy(struct msm_ringbuffer *ring) { + if (IS_ERR_OR_NULL(ring)) + return; + if (ring->bo) { + msm_gem_put_iova(ring->bo, ring->gpu->aspace); msm_gem_put_vaddr(ring->bo); drm_gem_object_unreference_unlocked(ring->bo); } diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h index 6e0e104..5907063f 100644 --- a/drivers/gpu/drm/msm/msm_ringbuffer.h +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h @@ -20,14 +20,29 @@ #include "msm_drv.h" +#define rbmemptr(ring, member) \ + ((ring)->memptrs_iova + offsetof(struct msm_rbmemptrs, member)) + +struct msm_rbmemptrs { + volatile uint32_t rptr; + volatile uint32_t fence; +}; + struct msm_ringbuffer { struct msm_gpu *gpu; - int size; + int id; struct drm_gem_object *bo; uint32_t *start, *end, *cur; + struct list_head submits; + uint64_t iova; + uint32_t seqno; + uint32_t hangcheck_fence; + struct msm_rbmemptrs *memptrs; + uint64_t memptrs_iova; }; -struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int size); +struct msm_ringbuffer *msm_ringbuffer_new(struct msm_gpu *gpu, int id, + void *memptrs, uint64_t memptrs_iova); void msm_ringbuffer_destroy(struct msm_ringbuffer *ring); /* ringbuffer helpers (the parts that are same for a3xx/a2xx/z180..) */ -- 1.9.1 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel