Am 22.02.22 um 10:35 schrieb Somalapuram Amaranath:
List of register populated for dump collection during the GPU reset.
Signed-off-by: Somalapuram Amaranath <Amaranath.Somalapuram@xxxxxxx>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 +
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 92 +++++++++++++++++++++
2 files changed, 96 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index b85b67a88a3d..6e35f2c4c869 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1097,6 +1097,10 @@ struct amdgpu_device {
struct amdgpu_reset_control *reset_cntl;
uint32_t ip_versions[HW_ID_MAX][HWIP_MAX_INSTANCE];
+
+ /* reset dump register */
+ uint32_t *reset_dump_reg_list;
+ int num_regs;
};
static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index 164d6a9e9fbb..733ee54efa34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1609,6 +1609,96 @@ DEFINE_DEBUGFS_ATTRIBUTE(fops_ib_preempt, NULL,
DEFINE_DEBUGFS_ATTRIBUTE(fops_sclk_set, NULL,
amdgpu_debugfs_sclk_set, "%llu\n");
+static ssize_t amdgpu_reset_dump_register_list_read(struct file *f,
+ char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ char reg_offset[11];
+ int i, ret, len = 0;
+
+ if (*pos)
+ return 0;
+
+ ret = down_read_killable(&adev->reset_sem);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < adev->num_regs; i++) {
+ sprintf(reg_offset, "0x%x ", adev->reset_dump_reg_list[i]);
+ up_read(&adev->reset_sem);
+ ret = copy_to_user(buf + len, reg_offset, strlen(reg_offset));
+ if (ret)
+ goto error;
There is nothing to cleanup here any more, just return -EFAULT.
+
+ len += strlen(reg_offset);
+ ret = down_read_killable(&adev->reset_sem);
+ if (ret)
+ return ret;
+ }
+
+ up_read(&adev->reset_sem);
+ ret = copy_to_user(buf + len, "\n", 1);
+ if (ret)
+ return -EFAULT;
+
+ len++;
+ *pos += len;
+
+ return len;
+error:
+ up_read(&adev->reset_sem);
+ return -EFAULT;
That's actually wrong. The code now drops the lock before calling
copy_to_user().
+}
+
+static ssize_t amdgpu_reset_dump_register_list_write(struct file *f,
+ const char __user *buf, size_t size, loff_t *pos)
+{
+ struct amdgpu_device *adev = (struct amdgpu_device *)file_inode(f)->i_private;
+ char *reg_offset, *reg, reg_temp[11];
+ uint32_t *tmp;
+ int ret, i = 0, len = 0;
+
+ do {
+ reg_offset = reg_temp;
+ memset(reg_offset, 0, 11);
+ ret = copy_from_user(reg_offset, buf + len, min(11, ((int)size-len)));
+ if (ret)
+ goto error_free;
+
+ reg = strsep(®_offset, " ");
As I said before using strsep() is a rather bad idea here. The function
doesn't takes a count argument and there is no guarantee that there is a
space or zero terminator inside the string.
What should do instead is to use memchr() or alternatively using
sscanf() should work as well and does both things in just one call. E.g.
something like this
if (sscanf(reg, "%Lx %n", &tmp[i], &ret) != 1)
goto error_free;
len += ret;
i++
And BTW: I would use a newline instead of a space as separator. This way
read and write interface matches.
Regards,
Christian.
+ tmp = krealloc_array(tmp, i + 1, sizeof(uint32_t), GFP_KERNEL);
+ ret = kstrtouint(reg, 16, &tmp[i]);
+ if (ret)
+ goto error_free;
+
+ len += strlen(reg) + 1;
+ i++;
+
+ } while (len < size);
+
+ ret = down_write_killable(&adev->reset_sem);
+ if (ret)
+ goto error_free;
+
+ swap(adev->reset_dump_reg_list, tmp);
+ adev->num_regs = i;
+ up_write(&adev->reset_sem);
+ ret = size;
+
+error_free:
+ kfree(tmp);
+ return ret;
+}
+
+
+
+static const struct file_operations amdgpu_reset_dump_register_list = {
+ .owner = THIS_MODULE,
+ .read = amdgpu_reset_dump_register_list_read,
+ .write = amdgpu_reset_dump_register_list_write,
+ .llseek = default_llseek
+};
+
int amdgpu_debugfs_init(struct amdgpu_device *adev)
{
struct dentry *root = adev_to_drm(adev)->primary->debugfs_root;
@@ -1672,6 +1762,8 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
&amdgpu_debugfs_test_ib_fops);
debugfs_create_file("amdgpu_vm_info", 0444, root, adev,
&amdgpu_debugfs_vm_info_fops);
+ debugfs_create_file("amdgpu_reset_dump_register_list", 0644, root, adev,
+ &amdgpu_reset_dump_register_list);
adev->debugfs_vbios_blob.data = adev->bios;
adev->debugfs_vbios_blob.size = adev->bios_size;