Looks good to me in general, a few nit picks and sugegstions below. Am 31.08.2016 um 05:49 schrieb Monk Liu: > v1: > for gfx8, use CONTEXT_CONTROL package to dynamically > skip preamble CEIB and other load_xxx command in sequence. > > v2: > support GFX7 as well, and bump up version. > remove cntxcntl in compute ring funcs because CPC doesn't > support this packet. > > v3: fix reduntant judgement in cntxcntl. > > Change-Id: I4b87ca84ea8c11ba4f7fb4c0e8a5be537ccde851 > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > > Change-Id: I5d24c1bb5c14190ce4adeb6a331ee3d92b3d5c83 > Signed-off-by: Monk Liu <Monk.Liu at amd.com> Only one signed of by line is enough and remove the change-ids. > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 9 +++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 12 ++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++- > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 16 +++++++++------- > drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c | 20 ++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 30 ++++++++++++++++++++++++++++++ > 6 files changed, 82 insertions(+), 8 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 1254410..0de5f08 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -321,6 +321,7 @@ struct amdgpu_ring_funcs { > void (*begin_use)(struct amdgpu_ring *ring); > void (*end_use)(struct amdgpu_ring *ring); > void (*emit_switch_buffer) (struct amdgpu_ring *ring); > + void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); > }; > > /* > @@ -965,6 +966,7 @@ struct amdgpu_ctx { > spinlock_t ring_lock; > struct fence **fences; > struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; > + bool preamble_presented; > }; > > struct amdgpu_ctx_mgr { > @@ -1227,8 +1229,13 @@ struct amdgpu_cs_parser { > > /* user fence */ > struct amdgpu_bo_list_entry uf_entry; > + bool preamble_present; /* True means this command submit involves a preamble IB */ We only need this in amdgpu_cs_ib_fill() don't we? See below as well. > }; > > +#define PREAMBLE_IB_PRESENT (1 << 0) /* bit set means command submit involves a preamble IB */ > +#define PREAMBLE_IB_PRESENT_FIRST (1 << 1) /* bit set means preamble IB is first presented in belonging context */ Why does that makes a difference if it is seen for the first time? > +#define HAVE_CTX_SWITCH (1 << 2) /* bit set means context switch occured */ > + > struct amdgpu_job { > struct amd_sched_job base; > struct amdgpu_device *adev; > @@ -1237,6 +1244,7 @@ struct amdgpu_job { > struct amdgpu_sync sync; > struct amdgpu_ib *ibs; > struct fence *fence; /* the hw fence */ > + uint32_t preamble_status; > uint32_t num_ibs; > void *owner; > uint64_t fence_ctx; /* the fence_context this job uses */ > @@ -2264,6 +2272,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) > #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) > #define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r)) > #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) > +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) > #define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) > #define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) > #define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 2d4e005..6d8c050 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -792,6 +792,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, > if (r) > return r; > > + if (ib->flags & AMDGPU_IB_FLAG_PREAMBLE) > + parser->preamble_present = true; > + > if (parser->job->ring && parser->job->ring != ring) > return -EINVAL; > > @@ -930,6 +933,12 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > return r; > } > > + if (p->preamble_present) { > + job->preamble_status |= PREAMBLE_IB_PRESENT; > + if (!p->ctx->preamble_presented) > + job->preamble_status |= PREAMBLE_IB_PRESENT_FIRST; > + } > + Better move this to the end of amdgpu_cs_ib_fill() where we allocate the IBs as well. > job->owner = p->filp; > job->fence_ctx = entity->fence_context; > p->fence = fence_get(&job->base.s_fence->finished); > @@ -940,6 +949,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, > trace_amdgpu_cs_ioctl(job); > amd_sched_entity_push_job(&job->base); > > + if (p->preamble_present) > + p->ctx->preamble_presented = true; > + > return 0; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > index 56c85e6..44db0ab 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c > @@ -55,9 +55,10 @@ > * - 3.3.0 - Add VM support for UVD on supported hardware. > * - 3.4.0 - Add AMDGPU_INFO_NUM_EVICTIONS. > * - 3.5.0 - Add support for new UVD_NO_OP register. > + * - 3.6.0 - UMD doesn't/shouldn't need to use CONTEXT_CONTROL in IB, KMD should do it > */ > #define KMS_DRIVER_MAJOR 3 > -#define KMS_DRIVER_MINOR 5 > +#define KMS_DRIVER_MINOR 6 > #define KMS_DRIVER_PATCHLEVEL 0 > > int amdgpu_vram_limit = 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > index 04263f0..b12b5ba 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > @@ -121,10 +121,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > { > struct amdgpu_device *adev = ring->adev; > struct amdgpu_ib *ib = &ibs[0]; > - bool skip_preamble, need_ctx_switch; > + bool need_ctx_switch; > unsigned patch_offset = ~0; > struct amdgpu_vm *vm; > uint64_t fence_ctx; > + uint32_t status = 0; > > unsigned i; > int r = 0; > @@ -174,15 +175,16 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > /* always set cond_exec_polling to CONTINUE */ > *ring->cond_exe_cpu_addr = 1; > > - skip_preamble = ring->current_ctx == fence_ctx; > need_ctx_switch = ring->current_ctx != fence_ctx; > + if (job && ring->funcs->emit_cntxcntl) { > + if (need_ctx_switch) > + status |= HAVE_CTX_SWITCH; > + status |= job->preamble_status; > + amdgpu_ring_emit_cntxcntl(ring, status); > + } > + > for (i = 0; i < num_ibs; ++i) { > ib = &ibs[i]; > - > - /* drop preamble IBs if we don't have a context switch */ > - if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) && skip_preamble) > - continue; > - Would be nice to keep this functionality for cases where we don't support emit_cntxcntl (e.g. SI?). > amdgpu_ring_emit_ib(ring, ib, job ? job->vm_id : 0, > need_ctx_switch); > need_ctx_switch = false; > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > index f055d49..0d5addb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c > @@ -2096,6 +2096,25 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, > amdgpu_ring_write(ring, control); > } > > +static void gfx_v7_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) > +{ > + uint32_t dw2 = 0; > + > + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ > + if (flags & HAVE_CTX_SWITCH) { > + /* set load_global_config & load_global_uconfig */ > + dw2 |= 0x8001; > + /* set load_cs_sh_regs */ > + dw2 |= 0x01000000; > + /* set load_per_context_state & load_gfx_sh_regs */ > + dw2 |= 0x10002; Better define some constants for those. Regards, Christian. > + } > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); > + amdgpu_ring_write(ring, dw2); > + amdgpu_ring_write(ring, 0); > +} > + > /** > * gfx_v7_0_ring_test_ib - basic ring IB test > * > @@ -4929,6 +4948,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { > .test_ib = gfx_v7_0_ring_test_ib, > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > + .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, > }; > > static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { > diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index 8ba8e42..73f6ffa 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -6085,6 +6085,35 @@ static void gfx_v8_ring_emit_sb(struct amdgpu_ring *ring) > amdgpu_ring_write(ring, 0); > } > > +static void gfx_v8_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags) > +{ > + uint32_t dw2 = 0; > + > + dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */ > + if (flags & HAVE_CTX_SWITCH) { > + /* set load_global_config & load_global_uconfig */ > + dw2 |= 0x8001; > + /* set load_cs_sh_regs */ > + dw2 |= 0x01000000; > + /* set load_per_context_state & load_gfx_sh_regs for GFX */ > + dw2 |= 0x10002; > + > + /* set load_ce_ram if preamble presented */ > + if (PREAMBLE_IB_PRESENT & flags) > + dw2 |= 0x10000000; > + } else { > + /* still load_ce_ram if this is the first time preamble presented > + * although there is no context switch happens. > + */ > + if (PREAMBLE_IB_PRESENT_FIRST & flags) > + dw2 |= 0x10000000; > + } > + > + amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1)); > + amdgpu_ring_write(ring, dw2); > + amdgpu_ring_write(ring, 0); > +} > + > static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, > enum amdgpu_interrupt_state state) > { > @@ -6267,6 +6296,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { > .insert_nop = amdgpu_ring_insert_nop, > .pad_ib = amdgpu_ring_generic_pad_ib, > .emit_switch_buffer = gfx_v8_ring_emit_sb, > + .emit_cntxcntl = gfx_v8_ring_emit_cntxcntl, > }; > > static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {