On Wed, Apr 20, 2022 at 5:48 PM Alice Wong <shiwei.wong@xxxxxxx> wrote: > > If at any point psp_hw_init failed, psp_hw_fini would not be called > during unload due to ip_blocks[PSP].status.hw not being set to true. > This could cause a memory leak when the driver unloads. > As a rule of thumb, each IP block should cleanup themselves when > their hw_init fails. Only previously intialized blocks should > be cleaned up by the common framework. > > v1: Call IP blocks' respective hw_fini when hw_init failed from > the common framework > v2: Call psp_hw_fini when psp_hw_init failed. > > Signed-off-by: Alice Wong <shiwei.wong@xxxxxxx> I don't think this is a good idea. The hw programming in hw_fini makes no sense if the driver never set it up in the first place if hw_init failed before initializing the hw. It would be better to just properly unwind the relevant functions. Presumably the problem you are seeing is the failure to free the GPU memory allocated in fw_fini, depending on where it fails. We should just allocate the memory in sw_init; that is what we do in other IPs. Does the attached patch fix the issue you are seeing? Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 57 +++++++++++++------------ > 1 file changed, 29 insertions(+), 28 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > index 5b9e48d44f5b..52b14efa848a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c > @@ -2537,6 +2537,34 @@ static int psp_load_fw(struct amdgpu_device *adev) > return ret; > } > > +static int psp_hw_fini(void *handle) > +{ > + struct amdgpu_device *adev = (struct amdgpu_device *)handle; > + struct psp_context *psp = &adev->psp; > + > + if (psp->ta_fw) { > + psp_ras_terminate(psp); > + psp_securedisplay_terminate(psp); > + psp_rap_terminate(psp); > + psp_dtm_terminate(psp); > + psp_hdcp_terminate(psp); > + } > + > + psp_asd_terminate(psp); > + > + psp_tmr_terminate(psp); > + psp_ring_destroy(psp, PSP_RING_TYPE__KM); > + > + amdgpu_bo_free_kernel(&psp->fw_pri_bo, > + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); > + amdgpu_bo_free_kernel(&psp->fence_buf_bo, > + &psp->fence_buf_mc_addr, &psp->fence_buf); > + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, > + (void **)&psp->cmd_buf_mem); > + > + return 0; > +} > + > static int psp_hw_init(void *handle) > { > int ret; > @@ -2563,37 +2591,10 @@ static int psp_hw_init(void *handle) > failed: > adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; > mutex_unlock(&adev->firmware.mutex); > + psp_hw_fini(handle); > return -EINVAL; > } > > -static int psp_hw_fini(void *handle) > -{ > - struct amdgpu_device *adev = (struct amdgpu_device *)handle; > - struct psp_context *psp = &adev->psp; > - > - if (psp->ta_fw) { > - psp_ras_terminate(psp); > - psp_securedisplay_terminate(psp); > - psp_rap_terminate(psp); > - psp_dtm_terminate(psp); > - psp_hdcp_terminate(psp); > - } > - > - psp_asd_terminate(psp); > - > - psp_tmr_terminate(psp); > - psp_ring_destroy(psp, PSP_RING_TYPE__KM); > - > - amdgpu_bo_free_kernel(&psp->fw_pri_bo, > - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); > - amdgpu_bo_free_kernel(&psp->fence_buf_bo, > - &psp->fence_buf_mc_addr, &psp->fence_buf); > - amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, > - (void **)&psp->cmd_buf_mem); > - > - return 0; > -} > - > static int psp_suspend(void *handle) > { > int ret; > -- > 2.25.1 >
From f98a157d52e199c8530dc0efc91ba3bd5b7ce3cc Mon Sep 17 00:00:00 2001 From: Alex Deucher <alexander.deucher@xxxxxxx> Date: Thu, 21 Apr 2022 01:21:52 -0400 Subject: [PATCH] drm/amdgpu/psp: move PSP memory alloc from hw_init to sw_init Memory allocations should be done in sw_init. hw_init should just be hardware programming needed to initialize the IP block. This is how most other IP blocks work. Move the GPU memory allocations from psp hw_init to psp sw_init and move the memory free to sw_fini. This also fixes a potential GPU memory leak if psp hw_init fails. Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 95 ++++++++++++------------- 1 file changed, 47 insertions(+), 48 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index a6acec1a6155..21aa556a6bef 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -357,7 +357,39 @@ static int psp_sw_init(void *handle) } } + ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, + amdgpu_sriov_vf(adev) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + &psp->fw_pri_bo, + &psp->fw_pri_mc_addr, + &psp->fw_pri_buf); + if (ret) + return ret; + + ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->fence_buf_bo, + &psp->fence_buf_mc_addr, + &psp->fence_buf); + if (ret) + goto failed1; + + ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + if (ret) + goto failed2; + return 0; + +failed2: + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); +failed1: + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + return ret; } static int psp_sw_fini(void *handle) @@ -391,6 +423,13 @@ static int psp_sw_fini(void *handle) kfree(cmd); cmd = NULL; + amdgpu_bo_free_kernel(&psp->fw_pri_bo, + &psp->fw_pri_mc_addr, &psp->fw_pri_buf); + amdgpu_bo_free_kernel(&psp->fence_buf_bo, + &psp->fence_buf_mc_addr, &psp->fence_buf); + amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, + (void **)&psp->cmd_buf_mem); + return 0; } @@ -2430,51 +2469,18 @@ static int psp_load_fw(struct amdgpu_device *adev) struct psp_context *psp = &adev->psp; if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) { - psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */ - goto skip_memalloc; - } - - if (amdgpu_sriov_vf(adev)) { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); + /* should not destroy ring, only stop */ + psp_ring_stop(psp, PSP_RING_TYPE__KM); } else { - ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG, - AMDGPU_GEM_DOMAIN_GTT, - &psp->fw_pri_bo, - &psp->fw_pri_mc_addr, - &psp->fw_pri_buf); - } - - if (ret) - goto failed; - - ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->fence_buf_bo, - &psp->fence_buf_mc_addr, - &psp->fence_buf); - if (ret) - goto failed; - - ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); - if (ret) - goto failed; + memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); - memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE); - - ret = psp_ring_init(psp, PSP_RING_TYPE__KM); - if (ret) { - DRM_ERROR("PSP ring init failed!\n"); - goto failed; + ret = psp_ring_init(psp, PSP_RING_TYPE__KM); + if (ret) { + DRM_ERROR("PSP ring init failed!\n"); + goto failed; + } } -skip_memalloc: ret = psp_hw_start(psp); if (ret) goto failed; @@ -2592,13 +2598,6 @@ static int psp_hw_fini(void *handle) psp_tmr_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); - amdgpu_bo_free_kernel(&psp->fw_pri_bo, - &psp->fw_pri_mc_addr, &psp->fw_pri_buf); - amdgpu_bo_free_kernel(&psp->fence_buf_bo, - &psp->fence_buf_mc_addr, &psp->fence_buf); - amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr, - (void **)&psp->cmd_buf_mem); - return 0; } -- 2.35.1