Hi Christian Many thanks for your review. I will submit one new patch according to your suggestion. Best Regards Yintian Tao -----Original Message----- From: Koenig, Christian <Christian.Koenig@xxxxxxx> Sent: 2020年4月9日 20:42 To: Tao, Yintian <Yintian.Tao@xxxxxxx>; Deucher, Alexander <Alexander.Deucher@xxxxxxx>; Deng, Emily <Emily.Deng@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Subject: Re: [PATCH] drm/amdgpu: restrict debugfs register access under SR-IOV Am 09.04.20 um 08:01 schrieb Yintian Tao: > Under bare metal, there is no more else to take care of the GPU > register access through MMIO. > Under Virtualization, to access GPU register is implemented through > KIQ during run-time due to world-switch. > > Therefore, under SR-IOV user can only access debugfs to r/w GPU > registers when meets all three conditions below. > - amdgpu_gpu_recovery=0 > - TDR happened > - in_gpu_reset=0 > > Signed-off-by: Yintian Tao <yttao@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 83 ++++++++++++++++++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 7 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 23 ++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 7 ++ > 4 files changed, 114 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > index c0f9a651dc06..4f9780aabf5a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > @@ -152,11 +152,17 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + It would be better to merge these two functions together. E.g. that amdgpu_virt_enable_access_debugfs() returns an error if we can't allow this. And -EINVAL is maybe not the right thing here, since this is not caused by an invalid value. Maybe use -EPERM instead. Regards, Christian. > if (use_bank) { > if ((sh_bank != 0xFFFFFFFF && sh_bank >= adev->gfx.config.max_sh_per_se) || > (se_bank != 0xFFFFFFFF && se_bank >= adev->gfx.config.max_shader_engines)) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return -EINVAL; > } > mutex_lock(&adev->grbm_idx_mutex); > @@ -207,6 +213,7 @@ static int amdgpu_debugfs_process_reg_op(bool read, struct file *f, > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -255,6 +262,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > while (size) { > uint32_t value; > > @@ -263,6 +275,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, > if (r) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return r; > } > > @@ -275,6 +288,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf, > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -304,6 +318,11 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > while (size) { > uint32_t value; > > @@ -311,6 +330,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user > if (r) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return r; > } > > @@ -325,6 +345,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -354,6 +375,11 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > while (size) { > uint32_t value; > > @@ -362,6 +388,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, > if (r) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return r; > } > > @@ -374,6 +401,7 @@ static ssize_t amdgpu_debugfs_regs_didt_read(struct file *f, char __user *buf, > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -403,6 +431,11 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > while (size) { > uint32_t value; > > @@ -410,6 +443,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user > if (r) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return r; > } > > @@ -424,6 +458,7 @@ static ssize_t amdgpu_debugfs_regs_didt_write(struct file *f, const char __user > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -453,6 +488,11 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > while (size) { > uint32_t value; > > @@ -461,6 +501,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, > if (r) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return r; > } > > @@ -473,6 +514,7 @@ static ssize_t amdgpu_debugfs_regs_smc_read(struct file *f, char __user *buf, > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -502,6 +544,11 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > while (size) { > uint32_t value; > > @@ -509,6 +556,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * > if (r) { > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > + amdgpu_virt_disable_access_debugfs(adev); > return r; > } > > @@ -523,6 +571,7 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user * > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -651,16 +700,25 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > r = amdgpu_dpm_read_sensor(adev, idx, &values[0], &valuesize); > > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > - if (r) > + if (r) { > + amdgpu_virt_disable_access_debugfs(adev); > return r; > + } > > - if (size > valuesize) > + if (size > valuesize) { > + amdgpu_virt_disable_access_debugfs(adev); > return -EINVAL; > + } > > outsize = 0; > x = 0; > @@ -673,6 +731,7 @@ static ssize_t amdgpu_debugfs_sensor_read(struct file *f, char __user *buf, > } > } > > + amdgpu_virt_disable_access_debugfs(adev); > return !r ? outsize : r; > } > > @@ -720,6 +779,11 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > /* switch to the specific se/sh/cu */ > mutex_lock(&adev->grbm_idx_mutex); > amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -734,16 +798,20 @@ > static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, > pm_runtime_mark_last_busy(adev->ddev->dev); > pm_runtime_put_autosuspend(adev->ddev->dev); > > - if (!x) > + if (!x) { > + amdgpu_virt_disable_access_debugfs(adev); > return -EINVAL; > + } > > while (size && (offset < x * 4)) { > uint32_t value; > > value = data[offset >> 2]; > r = put_user(value, (uint32_t *)buf); > - if (r) > + if (r) { > + amdgpu_virt_disable_access_debugfs(adev); > return r; > + } > > result += 4; > buf += 4; > @@ -751,6 +819,7 @@ static ssize_t amdgpu_debugfs_wave_read(struct file *f, char __user *buf, > size -= 4; > } > > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > @@ -805,6 +874,11 @@ static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user *buf, > if (r < 0) > return r; > > + if (!amdgpu_virt_can_access_debugfs(adev)) > + return -EINVAL; > + else > + amdgpu_virt_enable_access_debugfs(adev); > + > /* switch to the specific se/sh/cu */ > mutex_lock(&adev->grbm_idx_mutex); > amdgpu_gfx_select_se_sh(adev, se, sh, cu); @@ -840,6 +914,7 @@ > static ssize_t amdgpu_debugfs_gpr_read(struct file *f, char __user > *buf, > > err: > kfree(data); > + amdgpu_virt_disable_access_debugfs(adev); > return result; > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 2b99f5952375..993b75dde5d2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -33,6 +33,7 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) > struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); > struct amdgpu_job *job = to_amdgpu_job(s_job); > struct amdgpu_task_info ti; > + struct amdgpu_device *adev = ring->adev; > > memset(&ti, 0, sizeof(struct amdgpu_task_info)); > > @@ -49,10 +50,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) > DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", > ti.process_name, ti.tgid, ti.task_name, ti.pid); > > - if (amdgpu_device_should_recover_gpu(ring->adev)) > + if (amdgpu_device_should_recover_gpu(ring->adev)) { > amdgpu_device_gpu_recover(ring->adev, job); > - else > + } else { > drm_sched_suspend_timeout(&ring->sched); > + adev->virt.tdr_debug = true; > + } > } > > int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > index 4d06c79065bf..d0dfe99ebc75 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c > @@ -334,3 +334,26 @@ void amdgpu_detect_virtualization(struct amdgpu_device *adev) > adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; > } > } > + > +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev) { > + if (!amdgpu_sriov_vf(adev)) > + return true; > + > + if (amdgpu_sriov_is_debug(adev)) > + return true; > + > + return false; > +} > + > +void amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev) { > + if (amdgpu_sriov_vf(adev)) > + adev->virt.caps &= ~AMDGPU_SRIOV_CAPS_RUNTIME; } > + > +void amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev) { > + if (amdgpu_sriov_vf(adev)) > + adev->virt.caps |= AMDGPU_SRIOV_CAPS_RUNTIME; } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > index f6ae3c656304..a01742b7bf12 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h > @@ -265,6 +265,7 @@ struct amdgpu_virt { > uint32_t gim_feature; > uint32_t reg_access_mode; > int req_init_data_ver; > + bool tdr_debug; > }; > > #define amdgpu_sriov_enabled(adev) \ @@ -296,6 +297,8 @@ static > inline bool is_virtual_machine(void) > > #define amdgpu_sriov_is_pp_one_vf(adev) \ > ((adev)->virt.gim_feature & AMDGIM_FEATURE_PP_ONE_VF) > +#define amdgpu_sriov_is_debug(adev) \ > + ((!adev->in_gpu_reset) && adev->virt.tdr_debug) > > bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); > void amdgpu_virt_init_setting(struct amdgpu_device *adev); @@ -314,4 > +317,8 @@ int amdgpu_virt_fw_reserve_get_checksum(void *obj, unsigned long obj_size, > unsigned int chksum); > void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); > void amdgpu_detect_virtualization(struct amdgpu_device *adev); > + > +bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); void > +amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); void > +amdgpu_virt_disable_access_debugfs(struct amdgpu_device *adev); > #endif _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx