On Wed, Oct 23, 2024 at 2:08 PM Shaoyun Liu <shaoyun.liu@xxxxxxx> wrote: > > This is for MES to limit only one process for the user queues > > Signed-off-by: Shaoyun Liu <shaoyun.liu@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 24 ++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 19 +++++++++++++++++++ > drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 11 +++++++++++ > drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 11 +++++++++++ > 5 files changed, 67 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index e96984c53e72..72e38d621a29 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -1576,9 +1576,11 @@ static ssize_t amdgpu_gfx_set_enforce_isolation(struct device *dev, > if (adev->enforce_isolation[i] && !partition_values[i]) { > /* Going from enabled to disabled */ > amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); > + amdgpu_mes_set_enforce_isolation(adev, i, false); > } else if (!adev->enforce_isolation[i] && partition_values[i]) { > /* Going from disabled to enabled */ > amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); > + amdgpu_mes_set_enforce_isolation(adev, i, true); > } > adev->enforce_isolation[i] = partition_values[i]; > } > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > index bf584e9bcce4..29b6a2baae4d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c > @@ -1674,6 +1674,30 @@ bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev) > return is_supported; > } > > +/* Fix me -- node_id is used to identify the correct MES instances in the future */ > +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) > +{ > + struct mes_misc_op_input op_input = {0}; > + int r; > + > + op_input.op = MES_MISC_OP_CHANGE_CONFIG; > + op_input.change_config.option.limit_single_process = enable ? 1 : 0; > + > + if (!adev->mes.funcs->misc_op) { > + DRM_ERROR("mes change config is not supported!\n"); Please use dev_err() so it's clear which GPU the error is coming from in a multi-GPU system. > + r = -EINVAL; > + goto error; > + } > + > + r = adev->mes.funcs->misc_op(&adev->mes, &op_input); > + if (r) > + DRM_ERROR("failed to change_config.\n"); dev_err() > + > +error: > + return r; > + > +} > + > #if defined(CONFIG_DEBUG_FS) > > static int amdgpu_debugfs_mes_event_log_show(struct seq_file *m, void *unused) > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > index 79f13d7e5e16..91bff6443c05 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h > @@ -311,6 +311,7 @@ enum mes_misc_opcode { > MES_MISC_OP_WRM_REG_WAIT, > MES_MISC_OP_WRM_REG_WR_WAIT, > MES_MISC_OP_SET_SHADER_DEBUGGER, > + MES_MISC_OP_CHANGE_CONFIG, > }; > > struct mes_misc_op_input { > @@ -349,6 +350,21 @@ struct mes_misc_op_input { > uint32_t tcp_watch_cntl[4]; > uint32_t trap_en; > } set_shader_debugger; > + > + struct { > + union { > + struct { > + uint32_t limit_single_process : 1; > + uint32_t enable_hws_logging_buffer : 1; > + uint32_t reserved : 30; > + }; > + uint32_t all; > + } option; > + struct { > + uint32_t tdr_level; > + uint32_t tdr_delay; > + } tdr_config; > + } change_config; > }; > }; > > @@ -519,4 +535,7 @@ static inline void amdgpu_mes_unlock(struct amdgpu_mes *mes) > } > > bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); > + > +int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); > + > #endif /* __AMDGPU_MES_H__ */ > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > index 57db0c006c8f..1d6de7bced48 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c > @@ -644,6 +644,14 @@ static int mes_v11_0_misc_op(struct amdgpu_mes *mes, > sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); > misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; > break; > + case MES_MISC_OP_CHANGE_CONFIG: > + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; > + misc_pkt.change_config.opcode = > + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; > + misc_pkt.change_config.option.bits.limit_single_process = > + input->change_config.option.limit_single_process; We should add a firmware version check here and and return an error if the fw version is too old to support this packet. > + break; > + > default: > DRM_ERROR("unsupported misc op (%d) \n", input->op); > return -EINVAL; > @@ -719,6 +727,9 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) > mes->event_log_gpu_addr; > } > > + if(enforce_isolation) missing space between if and (. > + mes_set_hw_res_pkt.limit_single_process =1; > + > return mes_v11_0_submit_pkt_and_poll_completion(mes, > &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), > offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); > diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > index 9d0e342a2f81..85eff9b777c2 100644 > --- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c > @@ -531,6 +531,14 @@ static int mes_v12_0_misc_op(struct amdgpu_mes *mes, > sizeof(misc_pkt.set_shader_debugger.tcp_watch_cntl)); > misc_pkt.set_shader_debugger.trap_en = input->set_shader_debugger.trap_en; > break; > + case MES_MISC_OP_CHANGE_CONFIG: > + misc_pkt.opcode = MESAPI_MISC__CHANGE_CONFIG; > + misc_pkt.change_config.opcode = > + MESAPI_MISC__CHANGE_CONFIG_OPTION_LIMIT_SINGLE_PROCESS; > + misc_pkt.change_config.option.bits.limit_single_process = > + input->change_config.option.limit_single_process; > + break; > + > default: > DRM_ERROR("unsupported misc op (%d) \n", input->op); > return -EINVAL; > @@ -633,6 +641,9 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) > mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = mes->event_log_gpu_addr + pipe * AMDGPU_MES_LOG_BUFFER_SIZE; > } > > + if(enforce_isolation) missing space between if and (. Other than that, looks good to me. Alex > + mes_set_hw_res_pkt.limit_single_process =1; > + > return mes_v12_0_submit_pkt_and_poll_completion(mes, pipe, > &mes_set_hw_res_pkt, sizeof(mes_set_hw_res_pkt), > offsetof(union MESAPI_SET_HW_RESOURCES, api_status)); > -- > 2.34.1 >