Re: [PATCH 1/3] drm/amdgpu: implement RAS interrupt handler for poison creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 4/20/2022 9:23 AM, Tao Zhou wrote:
Prepare for the implementation of poison consumption handler.

Signed-off-by: Tao Zhou <tao.zhou1@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 62 ++++++++++++++-----------
  1 file changed, 35 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 4bbed76b79c8..2d066cff70ea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1515,12 +1515,44 @@ static int amdgpu_ras_fs_fini(struct amdgpu_device *adev)
  /* ras fs end */
/* ih begin */
+static void ras_interrupt_poison_creation_handler(struct ras_manager *obj,
+				struct amdgpu_iv_entry *entry)
+{
+	struct ras_ih_data *data = &obj->ih_data;
+	struct ras_err_data err_data = {0, 0, 0, NULL};
+	int ret;
+
+	if (!data->cb)
+		return;
+
+	if (amdgpu_ras_is_poison_mode_supported(obj->adev) &&
+	    obj->head.block == AMDGPU_RAS_BLOCK__UMC)
+		dev_info(obj->adev->dev,
+			"Poison is created, no user action is needed.\n");
+	else {

It is confusing to handle RAS interrupts under poison_creation_handler in non-poison mode.

Thanks,
Lijo

+		/* Let IP handle its data, maybe we need get the output
+		 * from the callback to update the error type/count, etc
+		 */
+		ret = data->cb(obj->adev, &err_data, entry);
+		/* ue will trigger an interrupt, and in that case
+		 * we need do a reset to recovery the whole system.
+		 * But leave IP do that recovery, here we just dispatch
+		 * the error.
+		 */
+		if (ret == AMDGPU_RAS_SUCCESS) {
+			/* these counts could be left as 0 if
+			 * some blocks do not count error number
+			 */
+			obj->err_data.ue_count += err_data.ue_count;
+			obj->err_data.ce_count += err_data.ce_count;
+		}
+	}
+}
+
  static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
  {
  	struct ras_ih_data *data = &obj->ih_data;
  	struct amdgpu_iv_entry entry;
-	int ret;
-	struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
  		rmb();
@@ -1531,31 +1563,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
  		data->rptr = (data->aligned_element_size +
  				data->rptr) % data->ring_size;
- if (data->cb) {
-			if (amdgpu_ras_is_poison_mode_supported(obj->adev) &&
-			    obj->head.block == AMDGPU_RAS_BLOCK__UMC)
-				dev_info(obj->adev->dev,
-						"Poison is created, no user action is needed.\n");
-			else {
-				/* Let IP handle its data, maybe we need get the output
-				 * from the callback to udpate the error type/count, etc
-				 */
-				memset(&err_data, 0, sizeof(err_data));
-				ret = data->cb(obj->adev, &err_data, &entry);
-				/* ue will trigger an interrupt, and in that case
-				 * we need do a reset to recovery the whole system.
-				 * But leave IP do that recovery, here we just dispatch
-				 * the error.
-				 */
-				if (ret == AMDGPU_RAS_SUCCESS) {
-					/* these counts could be left as 0 if
-					 * some blocks do not count error number
-					 */
-					obj->err_data.ue_count += err_data.ue_count;
-					obj->err_data.ce_count += err_data.ce_count;
-				}
-			}
-		}
+		ras_interrupt_poison_creation_handler(obj, &entry);
  	}
  }



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux