Am 14.02.22 um 10:16 schrieb Somalapuram Amaranath:
Dump the list of register values to trace event on GPU reset.
Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram@xxxxxxx>
One nit pick below, with that fixed Reviewed-by: Christian König
<christian.koenig@xxxxxxx>.
---
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 17 ++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h | 16 ++++++++++++++++
2 files changed, 32 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1e651b959141..4e11a93134cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -4534,6 +4534,19 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
return r;
}
+static int amdgpu_reset_reg_dumps(struct amdgpu_device *adev)
+{
+ int i;
+ uint32_t reg_value;
We usually try to declare variables like "i" or "r"/"ret" last and
longer lines first.
Regards,
Christian.
+
+ for (i = 0; i < adev->n_regs; i++) {
+ reg_value = RREG32(adev->reset_dump_reg_list[i]);
+ trace_amdgpu_reset_reg_dumps(adev->reset_dump_reg_list[i], reg_value);
+ }
+
+ return 0;
+}
+
int amdgpu_do_asic_reset(struct list_head *device_list_handle,
struct amdgpu_reset_context *reset_context)
{
@@ -4567,8 +4580,10 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle,
tmp_adev->gmc.xgmi.pending_reset = false;
if (!queue_work(system_unbound_wq, &tmp_adev->xgmi_reset_work))
r = -EALREADY;
- } else
+ } else {
+ amdgpu_reset_reg_dumps(tmp_adev);
r = amdgpu_asic_reset(tmp_adev);
+ }
if (r) {
dev_err(tmp_adev->dev, "ASIC reset failed with error, %d for drm dev, %s",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index d855cb53c7e0..b9637925e85c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -537,6 +537,22 @@ TRACE_EVENT(amdgpu_ib_pipe_sync,
__entry->seqno)
);
+TRACE_EVENT(amdgpu_reset_reg_dumps,
+ TP_PROTO(uint32_t address, uint32_t value),
+ TP_ARGS(address, value),
+ TP_STRUCT__entry(
+ __field(uint32_t, address)
+ __field(uint32_t, value)
+ ),
+ TP_fast_assign(
+ __entry->address = address;
+ __entry->value = value;
+ ),
+ TP_printk("amdgpu register dump 0x%x: 0x%x",
+ __entry->address,
+ __entry->value)
+);
+
#undef AMDGPU_JOB_GET_TIMELINE_NAME
#endif