Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 09.04.20 um 08:01 schrieb Yintian Tao:
Under bare metal, there is no more else to take
care of the GPU register access through MMIO.
Under Virtualization, to access GPU register is
implemented through KIQ during run-time due to
world-switch.

Therefore, under SR-IOV user can only access
debugfs to r/w GPU registers when meets all
three conditions below.
- amdgpu_gpu_recovery=0
- TDR happened
- in_gpu_reset=0

Signed-off-by: Yintian Tao <yttao@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_job.c     |  7 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c    | 23 ++++++
  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h    |  7 ++
  4 files changed, 114 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index c0f9a651dc06..4f9780aabf5a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -152,11 +152,17 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+

It would be better to merge these two functions together.

E.g. that amdgpu_virt_enable_access_debugfs() returns an error if we can't allow this.

And -EINVAL is maybe not the right thing here, since this is not caused by an invalid value.

Maybe use -EPERM instead.

Regards,
Christian.

  	if (use_bank) {
  		if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) ||
  		    (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return -EINVAL;
  		}
  		mutex_lock(&adev->grbm_idx_mutex);
@@ -207,6 +213,7 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	while (size) {
  		uint32_t value;
@@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
  		if (r) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
  		}
@@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	while (size) {
  		uint32_t value;
@@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
  		if (r) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
  		}
@@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	while (size) {
  		uint32_t value;
@@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
  		if (r) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
  		}
@@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf,
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	while (size) {
  		uint32_t value;
@@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
  		if (r) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
  		}
@@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	while (size) {
  		uint32_t value;
@@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
  		if (r) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
  		}
@@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf,
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	while (size) {
  		uint32_t value;
@@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
  		if (r) {
  			pm_runtime_mark_last_busy(adev->ddev->dev);
  			pm_runtime_put_autosuspend(adev->ddev->dev);
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
  		}
@@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize);
pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
- if (r)
+	if (r) {
+		amdgpu_virt_disable_access_debugfs(adev);
  		return r;
+	}
- if (size > valuesize)
+	if (size > valuesize) {
+		amdgpu_virt_disable_access_debugfs(adev);
  		return -EINVAL;
+	}
outsize = 0;
  	x = 0;
@@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf,
  		}
  	}
+ amdgpu_virt_disable_access_debugfs(adev);
  	return !r ? outsize : r;
  }
@@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	/* switch to the specific se/sh/cu */
  	mutex_lock(&adev->grbm_idx_mutex);
  	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -734,16 +798,20 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
  	pm_runtime_mark_last_busy(adev->ddev->dev);
  	pm_runtime_put_autosuspend(adev->ddev->dev);
- if (!x)
+	if (!x) {
+		amdgpu_virt_disable_access_debugfs(adev);
  		return -EINVAL;
+	}
while (size && (offset < x * 4)) {
  		uint32_t value;
value = data[offset >> 2];
  		r = put_user(value, (uint32_t *)buf);
-		if (r)
+		if (r) {
+			amdgpu_virt_disable_access_debugfs(adev);
  			return r;
+		}
result += 4;
  		buf += 4;
@@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf,
  		size -= 4;
  	}
+ amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
@@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
  	if (r < 0)
  		return r;
+ if (!amdgpu_virt_can_access_debugfs(adev))
+		return -EINVAL;
+	else
+		amdgpu_virt_enable_access_debugfs(adev);
+
  	/* switch to the specific se/sh/cu */
  	mutex_lock(&adev->grbm_idx_mutex);
  	amdgpu_gfx_select_se_sh(adev, se, sh, cu);
@@ -840,6 +914,7 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf,
err:
  	kfree(data);
+	amdgpu_virt_disable_access_debugfs(adev);
  	return result;
  }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 2b99f5952375..993b75dde5d2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
  	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
  	struct amdgpu_job *job = to_amdgpu_job(s_job);
  	struct amdgpu_task_info ti;
+	struct amdgpu_device *adev = ring->adev;
memset(&ti, 0, sizeof(struct amdgpu_task_info)); @@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
  	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
  		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
- if (amdgpu_device_should_recover_gpu(ring->adev))
+	if (amdgpu_device_should_recover_gpu(ring->adev)) {
  		amdgpu_device_gpu_recover(ring->adev, job);
-	else
+	} else {
  		drm_sched_suspend_timeout(&ring->sched);
+		adev->virt.tdr_debug = true;
+	}
  }
int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 4d06c79065bf..d0dfe99ebc75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev)
  			adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
  	}
  }
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev)
+{
+	if (!amdgpu_sriov_vf(adev))
+		return true;
+
+	if (amdgpu_sriov_is_debug(adev))
+		return true;
+
+	return false;
+}
+
+void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME;
+}
+
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev))
+		adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index f6ae3c656304..a01742b7bf12 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -265,6 +265,7 @@ struct amdgpu_virt {
  	uint32_t gim_feature;
  	uint32_t reg_access_mode;
  	int req_init_data_ver;
+	bool tdr_debug;
  };
#define amdgpu_sriov_enabled(adev) \
@@ -296,6 +297,8 @@ static inline bool is_virtual_machine(void)
#define amdgpu_sriov_is_pp_one_vf(adev) \
  	((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF)
+#define amdgpu_sriov_is_debug(adev) \
+	((!adev->in_gpu_reset) && adev->virt.tdr_debug)
bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev);
  void amdgpu_virt_init_setting(struct amdgpu_device *adev);
@@ -314,4 +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size,
  					unsigned int chksum);
  void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev);
  void amdgpu_detect_virtualization(struct amdgpu_device *adev);
+
+bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev);
+void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev);
  #endif

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux