RE: [RFC v2 01/15] drm/amdgpu: add helper functions to track status for ras manager

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[AMD Official Use Only - AMD Internal Distribution Only]

-----Original Message-----
From: amd-gfx <amd-gfx-bounces@xxxxxxxxxxxxxxxxxxxxx> On Behalf Of Jiang Liu
Sent: Monday, January 13, 2025 09:42
To: Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Koenig, Christian <Christian.Koenig@xxxxxxx>; Pan, Xinhui <Xinhui.Pan@xxxxxxx>; airlied@xxxxxxxxx; simona@xxxxxxxx; Khatri, Sunil <Sunil.Khatri@xxxxxxx>; Lazar, Lijo <Lijo.Lazar@xxxxxxx>; Zhang, Hawking <Hawking.Zhang@xxxxxxx>; Limonciello, Mario <Mario.Limonciello@xxxxxxx>; Chen, Xiaogang <Xiaogang.Chen@xxxxxxx>; Russell, Kent <Kent.Russell@xxxxxxx>; shuox.liu@xxxxxxxxxxxxxxxxx; amd-gfx@xxxxxxxxxxxxxxxxxxxxx
Cc: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx>
Subject: [RFC v2 01/15] drm/amdgpu: add helper functions to track status for ras manager

Add helper functions to track status for ras manager and ip blocks.

Signed-off-by: Jiang Liu <gerry@xxxxxxxxxxxxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 38 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++++++++++++++++++++++++  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++++++
 3 files changed, 85 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5e55a44f9eef..f0f773659faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -377,12 +377,28 @@ int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);

 #define AMDGPU_MAX_IP_NUM 16

+enum amdgpu_marker {
+       // Markers for IRQs, used for both ip blocks and ras blocks.
+       AMDGPU_MARKER_IRQ0 = 32,
+       AMDGPU_MARKER_IRQ1,
+       AMDGPU_MARKER_IRQ2,
+       AMDGPU_MARKER_IRQ3,
+       AMDGPU_MARKER_IRQ4,
+       AMDGPU_MARKER_IRQ5,
+       AMDGPU_MARKER_IRQ6,
+       AMDGPU_MARKER_IRQ7,
+       AMDGPU_MARKER_IRQ_MAX = 63,
+};
+
+#define AMDGPU_MARKER_IRQ(idx)         (AMDGPU_MARKER_IRQ0 + (idx))
+
 struct amdgpu_ip_block_status {
        bool valid;
        bool sw;
        bool hw;
        bool late_initialized;
        bool hang;
+       uint64_t markers;
 };

 struct amdgpu_ip_block_version {
@@ -410,6 +426,28 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,  int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
                               const struct amdgpu_ip_block_version *ip_block_version);

+static inline void amdgpu_ip_block_set_marker(struct amdgpu_ip_block *ip_block,
+                                             enum amdgpu_marker marker)
+{
+       WARN_ON(marker > 63);
+       WARN_ON(ip_block->status.markers & (0x1ull << marker));
+       ip_block->status.markers |= 0x1ull << (int)marker; }
+
+static inline bool amdgpu_ip_block_test_and_clear_marker(struct amdgpu_ip_block *ip_block,
+                                                        enum amdgpu_marker marker)
+{
+       bool set = false;
+       uint64_t value = 0x1ull << (int)marker;
+
+       if ((ip_block->status.markers & value) != 0) {
+               ip_block->status.markers &= ~value;
+               set = true;
+       }
+
+       return set;
+}
+
 /*
  * BIOS.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f0924aa3f4e4..5e19d820ab34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -5207,3 +5207,40 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)

        return con->is_rma;
 }
+
+bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
+                           struct ras_common_if *head, int marker) {
+       struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+       if (obj && obj->markers & (0x1ull << marker))
+               return true;
+
+       return false;
+}
+
+void amdgpu_ras_set_marker(struct amdgpu_device *adev,
+                          struct ras_common_if *head, int marker) {
+       struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+       WARN_ON(marker > 63);
+       WARN_ON(obj->markers & (0x1ull << marker));
[kevin]:

It's best to check for null pointers before accessing member variables
Best Regards,
Kevin
+       if (obj)
+               obj->markers |= 0x1ull << marker;
+}
+
+bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
+                                     struct ras_common_if *head, int marker) {
+       bool set = false;
+       uint64_t value = 0x1ull << marker;
+       struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+       if (obj && (obj->markers & value) != 0) {
+               obj->markers &= ~value;
+               set = true;
+       }
+
+       return set;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 82db986c36a0..35881087b17b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -634,6 +634,8 @@ struct ras_manager {
        struct ras_common_if head;
        /* reference count */
        int use;
+       /* Flags for status tracking */
+       uint64_t markers;
        /* ras block link */
        struct list_head node;
        /* the device */
@@ -977,4 +979,12 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
                                const char *fmt, ...);

 bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
+                           struct ras_common_if *head, int marker); void
+amdgpu_ras_set_marker(struct amdgpu_device *adev,
+                          struct ras_common_if *head, int marker); bool
+amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
+                                     struct ras_common_if *head,
+                                     int marker);
 #endif
--
2.43.5





[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux