On Mon, Sep 2, 2024 at 3:34 AM Lijo Lazar <lijo.lazar@xxxxxxx> wrote: > > Add init levels to define the level to which device needs to be > initialized. > > Signed-off-by: Lijo Lazar <lijo.lazar@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 14 ++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 54 ++++++++++++++++++++++ > 2 files changed, 68 insertions(+) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 6e6580ab7e04..fefdace22894 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -820,6 +820,16 @@ struct amdgpu_mqd { > struct amdgpu_mqd_prop *p); > }; > > +enum amdgpu_init_lvl_id { > + AMDGPU_INIT_LEVEL_DEFAULT, > + AMDGPU_INIT_LEVEL_MINIMAL, Add some comments here to define what they mean? E.g., + AMDGPU_INIT_LEVEL_MINIMAL, /* minimum needed for reset at load time */ > +}; > + > +struct amdgpu_init_level { > + enum amdgpu_init_lvl_id level; > + uint32_t hwini_ip_block_mask; > +}; > + > #define AMDGPU_RESET_MAGIC_NUM 64 > #define AMDGPU_MAX_DF_PERFMONS 4 > struct amdgpu_reset_domain; > @@ -1169,6 +1179,8 @@ struct amdgpu_device { > bool enforce_isolation[MAX_XCP]; > /* Added this mutex for cleaner shader isolation between GFX and compute processes */ > struct mutex enforce_isolation_mutex; > + > + struct amdgpu_init_level *init_lvl; > }; > > static inline uint32_t amdgpu_ip_version(const struct amdgpu_device *adev, > @@ -1623,4 +1635,6 @@ extern const struct attribute_group amdgpu_vram_mgr_attr_group; > extern const struct attribute_group amdgpu_gtt_mgr_attr_group; > extern const struct attribute_group amdgpu_flash_attr_group; > > +void amdgpu_set_init_level(struct amdgpu_device *adev, > + enum amdgpu_init_lvl_id lvl); > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > index 61a189e30bcd..4fb09c4fbf22 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > @@ -144,6 +144,42 @@ const char *amdgpu_asic_name[] = { > "LAST", > }; > > +#define AMDGPU_IP_BLK_MASK_ALL GENMASK(AMDGPU_MAX_IP_NUM - 1, 0) > + > +struct amdgpu_init_level amdgpu_init_default = { > + .level = AMDGPU_INIT_LEVEL_DEFAULT, > + .hwini_ip_block_mask = AMDGPU_IP_BLK_MASK_ALL, > +}; > + > +struct amdgpu_init_level amdgpu_init_minimal = { > + .level = AMDGPU_INIT_LEVEL_MINIMAL, > + .hwini_ip_block_mask = > + BIT(AMD_IP_BLOCK_TYPE_GMC) | BIT(AMD_IP_BLOCK_TYPE_SMC) | > + BIT(AMD_IP_BLOCK_TYPE_COMMON) | BIT(AMD_IP_BLOCK_TYPE_IH) > +}; > + > +static inline bool amdgpu_ip_member_of_hwini(struct amdgpu_device *adev, > + enum amd_ip_block_type block) > +{ > + return (adev->init_lvl->hwini_ip_block_mask & (1U << block)) != 0; > +} > + > +void amdgpu_set_init_level(struct amdgpu_device *adev, > + enum amdgpu_init_lvl_id lvl) > +{ > + switch (lvl) { > + case AMDGPU_INIT_LEVEL_DEFAULT: Can move the default case here. > + adev->init_lvl = &amdgpu_init_default; > + break; > + case AMDGPU_INIT_LEVEL_MINIMAL: > + adev->init_lvl = &amdgpu_init_minimal; > + break; > + default: > + adev->init_lvl = &amdgpu_init_default; > + break; > + } > +} > + > static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev); > > /** > @@ -2633,6 +2669,9 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) > continue; > if (adev->ip_blocks[i].status.hw) > continue; > + if (!amdgpu_ip_member_of_hwini( > + adev, adev->ip_blocks[i].version->type)) > + continue; > if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || > (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) || > adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { > @@ -2658,6 +2697,9 @@ static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) > continue; > if (adev->ip_blocks[i].status.hw) > continue; > + if (!amdgpu_ip_member_of_hwini( > + adev, adev->ip_blocks[i].version->type)) > + continue; > r = adev->ip_blocks[i].version->funcs->hw_init(adev); > if (r) { > DRM_ERROR("hw_init of IP block <%s> failed %d\n", > @@ -2681,6 +2723,10 @@ static int amdgpu_device_fw_loading(struct amdgpu_device *adev) > if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_PSP) > continue; > > + if (!amdgpu_ip_member_of_hwini(adev, > + AMD_IP_BLOCK_TYPE_PSP)) > + break; > + > if (!adev->ip_blocks[i].status.sw) > continue; > > @@ -2803,6 +2849,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) > } > adev->ip_blocks[i].status.sw = true; > > + if (!amdgpu_ip_member_of_hwini( > + adev, adev->ip_blocks[i].version->type)) > + continue; > + > if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) { > /* need to do common hw init early so everything is set up for gmc */ > r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); > @@ -4196,6 +4246,8 @@ int amdgpu_device_init(struct amdgpu_device *adev, > > amdgpu_device_set_mcbp(adev); > Add a comment here to make it clear where and why we change the init level. > + amdgpu_set_init_level(adev, AMDGPU_INIT_LEVEL_DEFAULT); > + adev->init_lvl = &amdgpu_init_default; This pointer assignment can be dropped. > /* early init functions */ > r = amdgpu_device_ip_early_init(adev); > if (r) > @@ -5473,6 +5525,8 @@ int amdgpu_do_asic_reset(struct list_head *device_list_handle, > } > > list_for_each_entry(tmp_adev, device_list_handle, reset_list) { > + /* After reset, it's default init level */ > + amdgpu_set_init_level(tmp_adev, AMDGPU_INIT_LEVEL_DEFAULT); > if (need_full_reset) { > /* post card */ > amdgpu_ras_set_fed(tmp_adev, false); > -- > 2.25.1 >