On Thu, Mar 30, 2023 at 2:12 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Am 29.03.23 um 17:25 schrieb Alex Deucher: > > From: Christian König <christian.koenig@xxxxxxx> > > > > Add support for submitting the shadow update packet > > when submitting an IB. Needed for MCBP on GFX11. > > > > v2: update API for CSA (Alex) > > v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC > > Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in > > amdgpu_cs_pass1() > > Only initialize shadow on first use > > (Alex) > > v4: Pass parameters rather than job to new ring callback (Alex) > > v5: squash in change to call SET_Q_PREEMPTION_MODE/COND_EXEC > > before RELEASE_MEM to complete the UMDs use of the shadow (Alex) > > > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > > Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 26 +++++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 29 +++++++++++++++++++++++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 6 +++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 3 +++ > > 4 files changed, 63 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > index 47763ac0d14a..41bd3a1a1989 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > > @@ -281,6 +281,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p, > > case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES: > > case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT: > > case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL: > > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: > > break; > > > > default: > > @@ -583,6 +584,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p, > > return 0; > > } > > > > +static int amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p, > > + struct amdgpu_cs_chunk *chunk) > > +{ > > + struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata; > > + int i; > > + > > + if (shadow->flags & ~AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW) > > + return -EINVAL; > > + > > + for (i = 0; i < p->gang_size; ++i) { > > + p->jobs[i]->shadow_va = shadow->shadow_va; > > + p->jobs[i]->csa_va = shadow->csa_va; > > + p->jobs[i]->gds_va = shadow->gds_va; > > + p->jobs[i]->init_shadow = > > + shadow->flags & AMDGPU_CS_CHUNK_CP_GFX_SHADOW_FLAGS_INIT_SHADOW; > > + } > > + > > + return 0; > > +} > > + > > static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) > > { > > unsigned int ce_preempt = 0, de_preempt = 0; > > @@ -625,6 +646,11 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p) > > if (r) > > return r; > > break; > > + case AMDGPU_CHUNK_ID_CP_GFX_SHADOW: > > + r = amdgpu_cs_p2_shadow(p, chunk); > > + if (r) > > + return r; > > + break; > > } > > } > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > index bcccc348dbe2..9bee630eb0c9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c > > @@ -136,7 +136,9 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > > uint64_t fence_ctx; > > uint32_t status = 0, alloc_size; > > unsigned fence_flags = 0; > > - bool secure; > > + bool secure, init_shadow; > > + u64 shadow_va, csa_va, gds_va; > > + int vmid = AMDGPU_JOB_GET_VMID(job); > > > > unsigned i; > > int r = 0; > > @@ -150,9 +152,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > > vm = job->vm; > > fence_ctx = job->base.s_fence ? > > job->base.s_fence->scheduled.context : 0; > > + shadow_va = job->shadow_va; > > + csa_va = job->csa_va; > > + gds_va = job->gds_va; > > + init_shadow = job->init_shadow; > > } else { > > vm = NULL; > > fence_ctx = 0; > > + shadow_va = 0; > > + csa_va = 0; > > + gds_va = 0; > > + init_shadow = false; > > } > > > > if (!ring->sched.ready && !ring->is_mes_queue) { > > @@ -212,6 +222,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > > } > > > > amdgpu_ring_ib_begin(ring); > > + > > + if (job && ring->funcs->emit_gfx_shadow) > > + amdgpu_ring_emit_gfx_shadow(ring, shadow_va, csa_va, gds_va, > > + init_shadow, vmid); > > + > > if (job && ring->funcs->init_cond_exec) > > patch_offset = amdgpu_ring_init_cond_exec(ring); > > > > @@ -263,6 +278,18 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, > > fence_flags | AMDGPU_FENCE_FLAG_64BIT); > > } > > > > + if (ring->funcs->emit_gfx_shadow) { > > + amdgpu_ring_emit_gfx_shadow(ring, 0, 0, 0, false, 0); > > + > > + if (ring->funcs->init_cond_exec) { > > + unsigned ce_offset = ~0; > > + > > + ce_offset = amdgpu_ring_init_cond_exec(ring); > > + if (ce_offset != ~0 && ring->funcs->patch_cond_exec) > > + amdgpu_ring_patch_cond_exec(ring, ce_offset); > > + } > > + } > > + > > r = amdgpu_fence_emit(ring, f, job, fence_flags); > > if (r) { > > dev_err(adev->dev, "failed to emit fence (%d)\n", r); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > > index 52f2e313ea17..3f9804f956c9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h > > @@ -67,6 +67,12 @@ struct amdgpu_job { > > uint64_t uf_addr; > > uint64_t uf_sequence; > > > > + /* virtual addresses for shadow/GDS/CSA */ > > + uint64_t shadow_va; > > + uint64_t csa_va; > > + uint64_t gds_va; > > + bool init_shadow; > > Doesn't the job has a flags field for stuff like that? Or was that the IB? This comes from the flag in the new CS chunk. Job doesn't have any flags at the moment. Alex > > Apart from that looks good to me. > > > + > > /* job_run_counter >= 1 means a resubmit job */ > > uint32_t job_run_counter; > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > > index 3989e755a5b4..7942cb62e52c 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > > @@ -212,6 +212,8 @@ struct amdgpu_ring_funcs { > > void (*end_use)(struct amdgpu_ring *ring); > > void (*emit_switch_buffer) (struct amdgpu_ring *ring); > > void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); > > + void (*emit_gfx_shadow)(struct amdgpu_ring *ring, u64 shadow_va, u64 csa_va, > > + u64 gds_va, bool init_shadow, int vmid); > > void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, > > uint32_t reg_val_offs); > > void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); > > @@ -307,6 +309,7 @@ struct amdgpu_ring { > > #define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) > > #define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) > > #define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) > > +#define amdgpu_ring_emit_gfx_shadow(r, s, c, g, i, v) (r)->funcs->emit_gfx_shadow((r), (s), (c), (g), (i), (v)) > > #define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o)) > > #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) > > #define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) >