[AMD Official Use Only - AMD Internal Distribution Only] If the old FW doesn't support the isolation feature, it won't check that bit, the setting there will be ignored , so it won't cause the problem . Regards Shaoyun.liu -----Original Message----- From: Alex Deucher <alexdeucher@xxxxxxxxx> Sent: Thursday, October 24, 2024 9:21 AM To: Liu, Shaoyun <Shaoyun.Liu@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Subject: Re: [PATCH] drm/amd/amdgpu: limit single process inside MES On Wed, Oct 23, 2024 at 8:48 PM Shaoyun Liu <shaoyun.liu@xxxxxxx> wrote: > > This is for MES to limit only one process for the user queues > > Signed-off-by: Shaoyun Liu <shaoyun.liu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 ++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 15 +++++++++++++++ > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 11 +++++++++++ > 5 files changed, 71 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index e96984c53e72..72e38d621a29 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, > if (adev->enforce_isolation[i] && !partition_values[i]) { > /* Going from enabled to disabled */ > amdgpu_vmid_free_reserved(adev, > AMDGPU_GFXHUB(i)); > + amdgpu_mes_set_enforce_isolation(adev, i, > + false); > } else if (!adev->enforce_isolation[i] && partition_values[i]) { > /* Going from disabled to enabled */ > amdgpu_vmid_alloc_reserved(adev, > AMDGPU_GFXHUB(i)); > + amdgpu_mes_set_enforce_isolation(adev, i, > + true); > } > adev->enforce_isolation[i] = partition_values[i]; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > index bf584e9bcce4..dfc7d320fcbc 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > @@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) > return is_supported; > } > > +/* Fix me -- node_id is used to identify the correct MES instances in > +the future */ int amdgpu_mes_set_enforce_isolation(struct > +amdgpu_device *adev, uint32_t node_id, bool enable) { > + struct mes_misc_op_input op_input = {0}; > + int r; > + > + op_input.op = MES_MISC_OP_CHANGE_CONFIG; > + op_input.change_config.option.limit_single_process = enable ? > + 1 : 0; > + > + if (!adev->mes.funcs->misc_op) { > + dev_err(adev->dev,"mes change config is not supported!\n"); > + r = -EINVAL; > + goto error; > + } > + > + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); > + if (r) > + dev_err(adev->dev, "failed to change_config.\n"); > + > +error: > + return r; > + > +} > + > #if defined(CONFIG_DEBUG_FS) > > static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void > *unused) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > index 79f13d7e5e16..91bff6443c05 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > @@ -311,6 +311,7 @@ enum mes_misc_opcode { > MES_MISC_OP_WRM_REG_WAIT, > MES_MISC_OP_WRM_REG_WR_WAIT, > MES_MISC_OP_SET_SHADER_DEBUGGER, > + MES_MISC_OP_CHANGE_CONFIG, > }; > > struct mes_misc_op_input { > @@ -349,6 +350,21 @@ struct mes_misc_op_input { > uint32_t tcp_watch_cntl[4]; > uint32_t trap_en; > } set_shader_debugger; > + > + struct { > + union { > + struct { > + uint32_t limit_single_process : 1; > + uint32_t enable_hws_logging_buffer : 1; > + uint32_t reserved : 30; > + }; > + uint32_t all; > + } option; > + struct { > + uint32_t tdr_level; > + uint32_t tdr_delay; > + } tdr_config; > + } change_config; > }; > }; > > @@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct > amdgpu_mes *mes) } > > bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device > *adev); > + > +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, > +uint32_t node_id, bool enable); > + > #endif /* __AMDGPU_MES_H__ */ > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > index 57db0c006c8f..c621ba805433 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > @@ -644,6 +644,18 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, > sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); > misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; > break; > + case MES_MISC_OP_CHANGE_CONFIG: > + if ((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) < 0x63) { > + dev_err(adev->dev, "MES FW versoin must be larger than 0x63 to support limit single process feature.\n"); > + return -EINVAL; > + } > + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; > + misc_pkt.change_config.opcode = > + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; > + misc_pkt.change_config.option.bits.limit_single_process = > + input->change_config.option.limit_single_process; > + break; > + > default: > DRM_ERROR("unsupported misc op (%d) \n", input->op); > return -EINVAL; > @@ -719,6 +731,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) > mes->event_log_gpu_addr; > } > > + if (enforce_isolation) > + mes_set_hw_res_pkt.limit_single_process =1; Assuming that setting this on old firmware will not cause a problem? If so we need a firmware check here as well. If not, the patch is: Reviewed-by: Alex Deucher <alexander.deucher@xxxxxxx> > + > return mes_v11_0_submit_pkt_and_poll_completion(mes, > &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), > offsetof(union MESAPI_SET_HW_RESOURCES, > api_status)); diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > index 9d0e342a2f81..26d1b82721ce 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > @@ -531,6 +531,14 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, > sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); > misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; > break; > + case MES_MISC_OP_CHANGE_CONFIG: > + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; > + misc_pkt.change_config.opcode = > + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; > + misc_pkt.change_config.option.bits.limit_single_process = > + input->change_config.option.limit_single_process; > + break; > + > default: > DRM_ERROR("unsupported misc op (%d) \n", input->op); > return -EINVAL; > @@ -633,6 +641,9 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) > mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE; > } > > + if (enforce_isolation) > + mes_set_hw_res_pkt.limit_single_process =1; > + > return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, > &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), > offsetof(union MESAPI_SET_HW_RESOURCES, > api_status)); > -- > 2.34.1 >