[AMD Official Use Only - AMD Internal Distribution Only] -----Original Message----- From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Jiang Liu Sent: Monday, January 13, 2025 09:42 To: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx>; airlied@xxxxxxxxx; simona@xxxxxxxx; Khatri, Sunil <Sunil.Khatri@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Limonciello, Mario <Mario.Limonciello@xxxxxxx>; Chen, Xiaogang <Xiaogang.Chen@xxxxxxx>; Russell, Kent <Kent.Russell@xxxxxxx>; shuox.liu@xxxxxxxxxxxxxxxxx; amd-gfx@xxxxxxxxxxxxxxxxxxxxx Cc: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx> Subject: [RFC v2 01/15] drm/amdgpu: add helper functions to track status for ras manager Add helper functions to track status for ras manager and ip blocks. Signed-off-by: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 38 +++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++++++++++++++++++++++++ drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++++++ 3 files changed, 85 insertions(+) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 5e55a44f9eef..f0f773659faf 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -377,12 +377,28 @@ int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block); #define AMDGPU_MAX_IP_NUM 16 +enum amdgpu_marker { + // Markers for IRQs, used for both ip blocks and ras blocks. + AMDGPU_MARKER_IRQ0 = 32, + AMDGPU_MARKER_IRQ1, + AMDGPU_MARKER_IRQ2, + AMDGPU_MARKER_IRQ3, + AMDGPU_MARKER_IRQ4, + AMDGPU_MARKER_IRQ5, + AMDGPU_MARKER_IRQ6, + AMDGPU_MARKER_IRQ7, + AMDGPU_MARKER_IRQ_MAX = 63, +}; + +#define AMDGPU_MARKER_IRQ(idx) (AMDGPU_MARKER_IRQ0 + (idx)) + struct amdgpu_ip_block_status { bool valid; bool sw; bool hw; bool late_initialized; bool hang; + uint64_t markers; }; struct amdgpu_ip_block_version { @@ -410,6 +426,28 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, int amdgpu_device_ip_block_add(struct amdgpu_device *adev, const struct amdgpu_ip_block_version *ip_block_version); +static inline void amdgpu_ip_block_set_marker(struct amdgpu_ip_block *ip_block, + enum amdgpu_marker marker) +{ + WARN_ON(marker > 63); + WARN_ON(ip_block->status.markers & (0x1ull << marker)); + ip_block->status.markers |= 0x1ull << (int)marker; } + +static inline bool amdgpu_ip_block_test_and_clear_marker(struct amdgpu_ip_block *ip_block, + enum amdgpu_marker marker) +{ + bool set = false; + uint64_t value = 0x1ull << (int)marker; + + if ((ip_block->status.markers & value) != 0) { + ip_block->status.markers &= ~value; + set = true; + } + + return set; +} + /* * BIOS. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index f0924aa3f4e4..5e19d820ab34 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -5207,3 +5207,40 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev) return con->is_rma; } + +bool amdgpu_ras_test_marker(struct amdgpu_device *adev, + struct ras_common_if *head, int marker) { + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + if (obj && obj->markers & (0x1ull << marker)) + return true; + + return false; +} + +void amdgpu_ras_set_marker(struct amdgpu_device *adev, + struct ras_common_if *head, int marker) { + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + WARN_ON(marker > 63); + WARN_ON(obj->markers & (0x1ull << marker)); [kevin]: It's best to check for null pointers before accessing member variables Best Regards, Kevin + if (obj) + obj->markers |= 0x1ull << marker; +} + +bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev, + struct ras_common_if *head, int marker) { + bool set = false; + uint64_t value = 0x1ull << marker; + struct ras_manager *obj = amdgpu_ras_find_obj(adev, head); + + if (obj && (obj->markers & value) != 0) { + obj->markers &= ~value; + set = true; + } + + return set; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h index 82db986c36a0..35881087b17b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h @@ -634,6 +634,8 @@ struct ras_manager { struct ras_common_if head; /* reference count */ int use; + /* Flags for status tracking */ + uint64_t markers; /* ras block link */ struct list_head node; /* the device */ @@ -977,4 +979,12 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id, const char *fmt, ...); bool amdgpu_ras_is_rma(struct amdgpu_device *adev); + +bool amdgpu_ras_test_marker(struct amdgpu_device *adev, + struct ras_common_if *head, int marker); void +amdgpu_ras_set_marker(struct amdgpu_device *adev, + struct ras_common_if *head, int marker); bool +amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev, + struct ras_common_if *head, + int marker); #endif -- 2.43.5