Signed-off-by: Samuel Pitoiset <samuel.pitoiset@xxxxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 31
++++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 3 ++-
drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_job.h | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 +++
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 20 +++++++++++++--
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20 +++++++++++++++
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 19 +++++++++++++++
include/uapi/drm/amdgpu_drm.h | 8 ++++++
9 files changed, 110 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index a512ccbc4dea..6ca5c4912e3a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -104,6 +104,19 @@ static int amdgpu_cs_bo_handles_chunk(struct
amdgpu_cs_parser *p,
return r;
}
+static int amdgpu_cs_user_trap_chunk(struct amdgpu_cs_parser *p,
+ struct drm_amdgpu_cs_chunk_trap *data,
+ uint64_t *tba_addr, uint64_t *tma_addr)
+{
+ if (!data->tba_addr || !data->tma_addr)
+ return -EINVAL;
+
+ *tba_addr = data->tba_addr;
+ *tma_addr = data->tma_addr;
+
+ return 0;
+}
+
static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
@@ -112,6 +125,7 @@ static int amdgpu_cs_parser_init(struct
amdgpu_cs_parser *p, union drm_amdgpu_cs
uint64_t *chunk_array;
unsigned size, num_ibs = 0;
uint32_t uf_offset = 0;
+ uint64_t tba_addr = 0, tma_addr = 0;
int i;
int ret;
@@ -214,6 +228,19 @@ static int amdgpu_cs_parser_init(struct
amdgpu_cs_parser *p, union drm_amdgpu_cs
break;
+ case AMDGPU_CHUNK_ID_TRAP:
+ size = sizeof(struct drm_amdgpu_cs_chunk_trap);
+ if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+ ret = -EINVAL;
+ goto free_partial_kdata;
+ }
+
+ ret = amdgpu_cs_user_trap_chunk(p, p->chunks[i].kdata,
+ &tba_addr, &tma_addr);
+ if (ret)
+ goto free_partial_kdata;
+ break;
+
case AMDGPU_CHUNK_ID_DEPENDENCIES:
case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@@ -239,6 +266,10 @@ static int amdgpu_cs_parser_init(struct
amdgpu_cs_parser *p, union drm_amdgpu_cs
if (p->uf_entry.tv.bo)
p->job->uf_addr = uf_offset;
+
+ p->job->tba_addr = tba_addr;
+ p->job->tma_addr = tma_addr;
+
kfree(chunk_array);
/* Use this opportunity to fill in task info for the vm */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 26127c7d2f32..1e703119e4c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -88,9 +88,10 @@
* - 3.37.0 - L2 is invalidated before SDMA IBs, needed for
correctness
* - 3.38.0 - Add AMDGPU_IB_FLAG_EMIT_MEM_SYNC
* - 3.39.0 - DMABUF implicit sync does a full pipeline sync
+ * - 3.40.0 - Add AMDGPU_CHUNK_ID_TRAP
*/
#define KMS_DRIVER_MAJOR 3
-#define KMS_DRIVER_MINOR 39
+#define KMS_DRIVER_MINOR 40
#define KMS_DRIVER_PATCHLEVEL 0
int amdgpu_vram_limit = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 8e58325bbca2..fd0d56724b4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -58,6 +58,10 @@ struct amdgpu_vmid {
uint32_t oa_base;
uint32_t oa_size;
+ /* user trap */
+ uint64_t tba_addr;
+ uint64_t tma_addr;
+
unsigned pasid;
struct dma_fence *pasid_mapping;
};
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index 81caac9b958a..b8ed5b13ea44 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -62,6 +62,10 @@ struct amdgpu_job {
/* user fence handling */
uint64_t uf_addr;
uint64_t uf_sequence;
+
+ /* user trap handling */
+ uint64_t tba_addr;
+ uint64_t tma_addr;
};
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned
num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index da871d84b742..1f165a6295d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -197,6 +197,9 @@ struct amdgpu_ring_funcs {
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
+ void (*emit_trap_handler)(struct amdgpu_ring *ring,
+ uint32_t vmid,
+ uint64_t tba_addr, uint64_t tma_addr);
};
struct amdgpu_ring {
@@ -265,6 +268,7 @@ struct amdgpu_ring {
#define amdgpu_ring_emit_vm_flush(r, vmid, addr)
(r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags)
(r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab,
as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws),
(ab), (as))
+#define amdgpu_ring_emit_trap_handler(r, v, tba, tma)
(r)->funcs->emit_trap_handler((r), (v), (tba), (tma))
#define amdgpu_ring_emit_hdp_flush(r)
(r)->funcs->emit_hdp_flush((r))
#define amdgpu_ring_emit_switch_buffer(r)
(r)->funcs->emit_switch_buffer((r))
#define amdgpu_ring_emit_cntxcntl(r, d)
(r)->funcs->emit_cntxcntl((r), (d))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 71e005cf2952..6b619bb03777 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1079,15 +1079,22 @@ int amdgpu_vm_flush(struct amdgpu_ring
*ring, struct amdgpu_job *job,
bool vm_flush_needed = job->vm_needs_flush;
struct dma_fence *fence = NULL;
bool pasid_mapping_needed = false;
+ bool trap_handler_needed = false;
unsigned patch_offset = 0;
bool update_spm_vmid_needed = (job->vm &&
(job->vm->reserved_vmid[vmhub] != NULL));
int r;
- if (update_spm_vmid_needed &&
adev->gfx.rlc.funcs->update_spm_vmid)
+ if (update_spm_vmid_needed &&
adev->gfx.rlc.funcs->update_spm_vmid) {
adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
+ trap_handler_needed = ring->funcs->emit_trap_handler && (
+ id->tba_addr != job->tba_addr ||
+ id->tma_addr != job->tma_addr);
+ }
+
if (amdgpu_vmid_had_gpu_reset(adev, id)) {
gds_switch_needed = true;
+ trap_handler_needed = true;
vm_flush_needed = true;
pasid_mapping_needed = true;
}
@@ -1099,12 +1106,14 @@ int amdgpu_vm_flush(struct amdgpu_ring
*ring, struct amdgpu_job *job,
mutex_unlock(&id_mgr->lock);
gds_switch_needed &= !!ring->funcs->emit_gds_switch;
+ trap_handler_needed &= !!ring->funcs->emit_trap_handler;
vm_flush_needed &= !!ring->funcs->emit_vm_flush &&
job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET;
pasid_mapping_needed &=
adev->gmc.gmc_funcs->emit_pasid_mapping &&
ring->funcs->emit_wreg;
- if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
+ if (!vm_flush_needed && !gds_switch_needed &&
+ !trap_handler_needed && !need_pipe_sync)
return 0;
if (ring->funcs->init_cond_exec)
@@ -1158,6 +1167,13 @@ int amdgpu_vm_flush(struct amdgpu_ring
*ring, struct amdgpu_job *job,
job->oa_size);
}
+ if (ring->funcs->emit_trap_handler && trap_handler_needed) {
+ id->tba_addr = job->tba_addr;
+ id->tma_addr = job->tma_addr;
+ amdgpu_ring_emit_trap_handler(ring, job->vmid,
job->tba_addr,
+ job->tma_addr);
+ }
+
if (ring->funcs->patch_cond_exec)
amdgpu_ring_patch_cond_exec(ring, patch_offset);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 65997ffaed45..f864b217589f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -7141,6 +7141,24 @@ static void
gfx_v10_0_ring_emit_gds_switch(struct amdgpu_ring *ring,
(1 << (oa_size + oa_base)) - (1 << oa_base));
}
+static void gfx_v10_0_ring_emit_trap_handler(struct amdgpu_ring
*ring,
+ uint32_t vmid,
+ uint64_t tba_addr,
+ uint64_t tma_addr)
+{
+ struct amdgpu_device *adev = ring->adev;
+
+ mutex_lock(&adev->srbm_mutex);
+ nv_grbm_select(adev, 0, 0, 0, vmid);
+ WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TBA_LO,
lower_32_bits(tba_addr >> 8));
+ WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TBA_HI,
upper_32_bits(tba_addr >> 8) |
+ 1 << SQ_SHADER_TBA_HI__TRAP_EN__SHIFT);
+ WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TMA_LO,
lower_32_bits(tma_addr >> 8));
+ WREG32_SOC15_RLC(GC, 0, mmSQ_SHADER_TMA_HI,
upper_32_bits(tma_addr >> 8));
+ nv_grbm_select(adev, 0, 0, 0, 0);
+ mutex_unlock(&adev->srbm_mutex);