[PATCH] drm/amdgpu: add SMU debug option support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To maintain system error state when SMU errors occurred,
which will aid in debugging SMU firmware issues,
add SMU debug option support.

It can be enabled or disabled via amdgpu_smu_debug
debugfs file. When enabled, it makes SMU errors fatal.
It is disabled by default.

== Command Guide ==

1, enable SMU debug option

 # echo 1 > /sys/kernel/debug/dri/0/amdgpu_smu_debug

2, disable SMU debug option

 # echo 0 > /sys/kernel/debug/dri/0/amdgpu_smu_debug

v2:
 - Resend command when timeout.(Lijo)
 - Use debugfs file instead of module parameter.

Signed-off-by: Lang Yu <lang.yu@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 32 +++++++++++++++++
 drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c      | 39 +++++++++++++++++++--
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 164d6a9e9fbb..f9412de86599 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -39,6 +39,8 @@
 
 #if defined(CONFIG_DEBUG_FS)
 
+extern int amdgpu_smu_debug;
+
 /**
  * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes
  *
@@ -1152,6 +1154,8 @@ static ssize_t amdgpu_debugfs_gfxoff_read(struct file *f, char __user *buf,
 	return result;
 }
 
+
+
 static const struct file_operations amdgpu_debugfs_regs2_fops = {
 	.owner = THIS_MODULE,
 	.unlocked_ioctl = amdgpu_debugfs_regs2_ioctl,
@@ -1609,6 +1613,26 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
 DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
 			amdgpu_debugfs_sclk_set, "%llu\n");
 
+static int amdgpu_debugfs_smu_debug_get(void *data, u64 *val)
+{
+	*val = amdgpu_smu_debug;
+	return 0;
+}
+
+static int amdgpu_debugfs_smu_debug_set(void *data, u64 val)
+{
+	if (val != 0 && val != 1)
+		return -EINVAL;
+
+	amdgpu_smu_debug = val;
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_smu_debug,
+			 amdgpu_debugfs_smu_debug_get,
+			 amdgpu_debugfs_smu_debug_set,
+			 "%llu\n");
+
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
 {
 	struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
@@ -1632,6 +1656,14 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 		return PTR_ERR(ent);
 	}
 
+	ent = debugfs_create_file("amdgpu_smu_debug", 0600, root, adev,
+				  &fops_smu_debug);
+	if (IS_ERR(ent)) {
+		DRM_ERROR("unable to create amdgpu_smu_debug debugsfs file\n");
+		return PTR_ERR(ent);
+	}
+
+
 	/* Register debugfs entries for amdgpu_ttm */
 	amdgpu_ttm_debugfs_init(adev);
 	amdgpu_debugfs_pm_init(adev);
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index 048ca1673863..b3969d7933d3 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -55,6 +55,14 @@
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	#type
+
+/*
+ * Used to enable SMU debug option. When enabled, it makes SMU errors fatal.
+ * This will aid in debugging SMU firmware issues.
+ * (0 = disabled (default), 1 = enabled)
+ */
+int amdgpu_smu_debug;
+
 static const char * const __smu_message_names[] = {
 	SMU_MESSAGE_TYPES
 };
@@ -272,6 +280,11 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu,
 	__smu_cmn_send_msg(smu, msg_index, param);
 	res = 0;
 Out:
+	if (unlikely(amdgpu_smu_debug == 1) && res) {
+		mutex_unlock(&smu->message_lock);
+		BUG();
+	}
+
 	return res;
 }
 
@@ -288,9 +301,17 @@ int smu_cmn_send_msg_without_waiting(struct smu_context *smu,
 int smu_cmn_wait_for_response(struct smu_context *smu)
 {
 	u32 reg;
+	int res;
 
 	reg = __smu_cmn_poll_stat(smu);
-	return __smu_cmn_reg2errno(smu, reg);
+	res = __smu_cmn_reg2errno(smu, reg);
+
+	if (unlikely(amdgpu_smu_debug == 1) && res) {
+		mutex_unlock(&smu->message_lock);
+		BUG();
+	}
+
+	return res;
 }
 
 /**
@@ -328,6 +349,7 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
 				    uint32_t param,
 				    uint32_t *read_arg)
 {
+	int retry_count = 0;
 	int res, index;
 	u32 reg;
 
@@ -349,15 +371,28 @@ int smu_cmn_send_smc_msg_with_param(struct smu_context *smu,
 		__smu_cmn_reg_print_error(smu, reg, index, param, msg);
 		goto Out;
 	}
+retry:
 	__smu_cmn_send_msg(smu, (uint16_t) index, param);
 	reg = __smu_cmn_poll_stat(smu);
 	res = __smu_cmn_reg2errno(smu, reg);
-	if (res != 0)
+	if (res != 0) {
 		__smu_cmn_reg_print_error(smu, reg, index, param, msg);
+		if ((res == -ETIME) && (retry_count++ < 1)) {
+			usleep_range(500, 1000);
+			dev_err(smu->adev->dev,
+				"SMU: resend command: index:%d param:0x%08X message:%s",
+				index, param, smu_get_message_name(smu, msg));
+			goto retry;
+		}
+		goto Out;
+	}
 	if (read_arg)
 		smu_cmn_read_arg(smu, read_arg);
 Out:
 	mutex_unlock(&smu->message_lock);
+
+	BUG_ON(unlikely(amdgpu_smu_debug == 1) && res);
+
 	return res;
 }
 
-- 
2.25.1




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux