Am 17.03.23 um 18:17 schrieb Alex Deucher:
From: Christian König <christian.koenig@xxxxxxx>
Add support for submitting the shadow update packet
when submitting an IB. Needed for MCBP on GFX11.
v2: update API for CSA (Alex)
v3: fix ordering; SET_Q_PREEMPTION_MODE most come before COND_EXEC
Add missing check for AMDGPU_CHUNK_ID_CP_GFX_SHADOW in
amdgpu_cs_pass1()
Only initialize shadow on first use
(Alex)
Signed-off-by: Christian König <christian.koenig@xxxxxxx>
Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 24 ++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c | 4 ++++
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 6 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 ++
5 files changed, 37 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index f6144b378617..9bdda246b09c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -280,6 +280,7 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_WAIT:
case AMDGPU_CHUNK_ID_SYNCOBJ_TIMELINE_SIGNAL:
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
break;
default:
@@ -587,6 +588,26 @@ static int amdgpu_cs_p2_syncobj_timeline_signal(struct amdgpu_cs_parser *p,
return 0;
}
+static void amdgpu_cs_p2_shadow(struct amdgpu_cs_parser *p,
+ struct amdgpu_cs_chunk *chunk)
+{
+ struct drm_amdgpu_cs_chunk_cp_gfx_shadow *shadow = chunk->kdata;
+ bool shadow_initialized = false;
+ int i;
+
+ for (i = 0; i < p->gang_size; ++i) {
+ p->jobs[i]->shadow_va = shadow->shadow_va;
+ p->jobs[i]->csa_va = shadow->csa_va;
+ p->jobs[i]->gds_va = shadow->gds_va;
Do we really need all three VAs separately?
+ if (!p->ctx->shadow_initialized) {
+ p->jobs[i]->init_shadow = true;
+ shadow_initialized = true;
+ }
+ }
+ if (shadow_initialized)
+ p->ctx->shadow_initialized = true;
This is a really bad idea since the IOCTL can be interrupted later on.
Why do we need that?
Regards,
Christian.
+}
+
static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
{
unsigned int ce_preempt = 0, de_preempt = 0;
@@ -629,6 +650,9 @@ static int amdgpu_cs_pass2(struct amdgpu_cs_parser *p)
if (r)
return r;
break;
+ case AMDGPU_CHUNK_ID_CP_GFX_SHADOW:
+ amdgpu_cs_p2_shadow(p, chunk);
+ break;
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
index 0fa0e56daf67..909d188c41f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h
@@ -57,6 +57,7 @@ struct amdgpu_ctx {
unsigned long ras_counter_ce;
unsigned long ras_counter_ue;
uint32_t stable_pstate;
+ bool shadow_initialized;
};
struct amdgpu_ctx_mgr {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index bcccc348dbe2..d88964b9407f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -212,6 +212,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
amdgpu_ring_ib_begin(ring);
+
+ if (job && ring->funcs->emit_gfx_shadow)
+ amdgpu_ring_emit_gfx_shadow(ring, job);
+
if (job && ring->funcs->init_cond_exec)
patch_offset = amdgpu_ring_init_cond_exec(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 9790def34815..b470808fa40e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -68,6 +68,12 @@ struct amdgpu_job {
uint64_t uf_addr;
uint64_t uf_sequence;
+ /* virtual addresses for shadow/GDS/CSA */
+ uint64_t shadow_va;
+ uint64_t csa_va;
+ uint64_t gds_va;
+ bool init_shadow;
+
/* job_run_counter >= 1 means a resubmit job */
uint32_t job_run_counter;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 3989e755a5b4..8643d4a92c27 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -212,6 +212,7 @@ struct amdgpu_ring_funcs {
void (*end_use)(struct amdgpu_ring *ring);
void (*emit_switch_buffer) (struct amdgpu_ring *ring);
void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags);
+ void (*emit_gfx_shadow)(struct amdgpu_ring *ring, struct amdgpu_job *job);
void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs);
void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val);
@@ -307,6 +308,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d))
+#define amdgpu_ring_emit_gfx_shadow(r, j) (r)->funcs->emit_gfx_shadow((r), (j))
#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), (d), (o))
#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v))
#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m))