use CONTEXT_CONTROL package to dynamically skip preamble IB and other load_xxx command in sequence. Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851 Signed-off-by: Monk Liu <Monk.Liu at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++------- drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 32 ++++++++++++++++++++++++++++++++ 4 files changed, 62 insertions(+), 7 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 9132719..a9dfeb5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -339,6 +339,7 @@ struct amdgpu_ring_funcs { void (*emit_wreg) (struct amdgpu_ring *ring, uint32_t offset, uint32_t val); void (*emit_rreg) (struct amdgpu_ring *ring, uint32_t offset); void (*emit_switch_buffer) (struct amdgpu_ring *ring); + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); }; /* @@ -1050,6 +1051,7 @@ struct amdgpu_ctx { spinlock_t ring_lock; struct fence **fences; struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; + bool preamble_presented; }; struct amdgpu_ctx_mgr { @@ -1320,8 +1322,13 @@ struct amdgpu_cs_parser { /* user fence */ struct amdgpu_bo_list_entry uf_entry; + bool preamble_present; /* True means this command submit involves a preamble IB */ }; +#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ +#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ +#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ + struct amdgpu_job { struct amd_sched_job base; struct amdgpu_device *adev; @@ -1330,6 +1337,7 @@ struct amdgpu_job { struct amdgpu_sync sync; struct amdgpu_ib *ibs; struct fence *fence; /* the hw fence */ + uint32_t preamble_status; uint32_t num_ibs; void *owner; uint64_t fence_ctx; /* the fence_context this job uses */ @@ -2374,6 +2382,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_ring_emit_wreg(r, i, v) (r)->funcs->emit_wreg((r), (i), (v)) #define amdgpu_ring_emit_rreg(r, i) (r)->funcs->emit_rreg((r), (i)) #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 78d3831..f2d739a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -711,6 +711,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, if (r) return r; + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) + parser->preamble_present = true; + if (parser->job->ring && parser->job->ring != ring) return -EINVAL; @@ -849,6 +852,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, return r; } + if (p->preamble_present) { + job->preamble_status |= PREAMBLE_IB_PRESENT; + if (!p->ctx->preamble_presented) + job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST; + } + job->owner = p->filp; job->fence_ctx = entity->fence_context; p->fence = fence_get(&job->base.s_fence->finished); @@ -859,6 +868,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, trace_amdgpu_cs_ioctl(job); amd_sched_entity_push_job(&job->base); + if (p->preamble_present) + p->ctx->preamble_presented = true; + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 9dffe36..94281c3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, { struct amdgpu_device *adev = ring->adev; struct amdgpu_ib *ib = &ibs[0]; - bool skip_preamble, need_ctx_switch; + bool need_ctx_switch; unsigned patch_offset = ~0; struct amdgpu_vm *vm; uint64_t fence_ctx; + uint32_t status = 0; unsigned i; int r = 0; @@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, /* always set cond_exec_polling to CONTINUE */ *ring->cond_exe_cpu_addr = 1; - skip_preamble = ring->current_ctx == fence_ctx; need_ctx_switch = ring->current_ctx != fence_ctx; + if (job && ring->funcs->emit_cntxcntl) { + if (need_ctx_switch) + status |= HAVE_CTX_SWITCH; + status |= job->preamble_status; + amdgpu_ring_emit_cntxcntl(ring, status); + } + for (i = 0; i < num_ibs; ++i) { ib = &ibs[i]; - - /* drop preamble IBs if we don't have a context switch */ - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) - continue; - amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, need_ctx_switch); need_ctx_switch = false; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index af354a8..c91c8a5 100755 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -6161,6 +6161,36 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) amdgpu_ring_write(ring, 0); } +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) +{ + uint32_t dw2 = 0; + + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ + if (flags & HAVE_CTX_SWITCH) { + /* set load_global_config & load_global_uconfig */ + dw2 |= 0x8001; + /* set load_cs_sh_regs */ + dw2 |= 0x01000000; + /* set load_per_context_state & load_gfx_sh_regs for GFX */ + if (ring->type != AMDGPU_RING_TYPE_COMPUTE) + dw2 |= 0x10002; + + /* set load_ce_ram if preamble presented */ + if (PREAMBLE_IB_PRESENT & flags) + dw2 |= 0x10000000; + } else { + /* still load_ce_ram if this is the first time preamble presented + * although there is no context switch happens. + */ + if (PREAMBLE_IB_PRESENT_FIRST & flags) + dw2 |= 0x10000000; + } + + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); + amdgpu_ring_write(ring, dw2); + amdgpu_ring_write(ring, 0); +} + static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -6469,6 +6499,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_switch_buffer = gfx_v8_ring_emit_sb, + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -6487,6 +6518,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { -- 1.9.1