Re: [PATCH] drm/amdgpu: clean up psp ip if hw_init failed v2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 20, 2022 at 5:48 PM Alice Wong <shiwei.wong@xxxxxxx> wrote:
>
> If at any point psp_hw_init failed, psp_hw_fini would not be called
> during unload due to ip_blocks[PSP].status.hw not being set to true.
> This could cause a memory leak when the driver unloads.
> As a rule of thumb, each IP block should cleanup themselves when
> their hw_init fails. Only previously intialized blocks should
> be cleaned up by the common framework.
>
> v1: Call IP blocks' respective hw_fini when hw_init failed from
>     the common framework
> v2: Call psp_hw_fini when psp_hw_init failed.
>
> Signed-off-by: Alice Wong <shiwei.wong@xxxxxxx>

I don't think this is a good idea.  The hw programming in hw_fini
makes no sense if the driver never set it up in the first place if
hw_init failed before initializing the hw.  It would be better to just
properly unwind the relevant functions.  Presumably the problem you
are seeing is the failure to free the GPU memory allocated in fw_fini,
depending on where it fails.  We should just allocate the memory in
sw_init; that is what we do in other IPs.  Does the attached patch fix
the issue you are seeing?

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 57 +++++++++++++------------
>  1 file changed, 29 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> index 5b9e48d44f5b..52b14efa848a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
> @@ -2537,6 +2537,34 @@ static int psp_load_fw(struct amdgpu_device *adev)
>         return ret;
>  }
>
> +static int psp_hw_fini(void *handle)
> +{
> +       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> +       struct psp_context *psp = &adev->psp;
> +
> +       if (psp->ta_fw) {
> +               psp_ras_terminate(psp);
> +               psp_securedisplay_terminate(psp);
> +               psp_rap_terminate(psp);
> +               psp_dtm_terminate(psp);
> +               psp_hdcp_terminate(psp);
> +       }
> +
> +       psp_asd_terminate(psp);
> +
> +       psp_tmr_terminate(psp);
> +       psp_ring_destroy(psp, PSP_RING_TYPE__KM);
> +
> +       amdgpu_bo_free_kernel(&psp->fw_pri_bo,
> +                             &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
> +       amdgpu_bo_free_kernel(&psp->fence_buf_bo,
> +                             &psp->fence_buf_mc_addr, &psp->fence_buf);
> +       amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
> +                             (void **)&psp->cmd_buf_mem);
> +
> +       return 0;
> +}
> +
>  static int psp_hw_init(void *handle)
>  {
>         int ret;
> @@ -2563,37 +2591,10 @@ static int psp_hw_init(void *handle)
>  failed:
>         adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT;
>         mutex_unlock(&adev->firmware.mutex);
> +       psp_hw_fini(handle);
>         return -EINVAL;
>  }
>
> -static int psp_hw_fini(void *handle)
> -{
> -       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> -       struct psp_context *psp = &adev->psp;
> -
> -       if (psp->ta_fw) {
> -               psp_ras_terminate(psp);
> -               psp_securedisplay_terminate(psp);
> -               psp_rap_terminate(psp);
> -               psp_dtm_terminate(psp);
> -               psp_hdcp_terminate(psp);
> -       }
> -
> -       psp_asd_terminate(psp);
> -
> -       psp_tmr_terminate(psp);
> -       psp_ring_destroy(psp, PSP_RING_TYPE__KM);
> -
> -       amdgpu_bo_free_kernel(&psp->fw_pri_bo,
> -                             &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
> -       amdgpu_bo_free_kernel(&psp->fence_buf_bo,
> -                             &psp->fence_buf_mc_addr, &psp->fence_buf);
> -       amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
> -                             (void **)&psp->cmd_buf_mem);
> -
> -       return 0;
> -}
> -
>  static int psp_suspend(void *handle)
>  {
>         int ret;
> --
> 2.25.1
>
From f98a157d52e199c8530dc0efc91ba3bd5b7ce3cc Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexander.deucher@xxxxxxx>
Date: Thu, 21 Apr 2022 01:21:52 -0400
Subject: [PATCH] drm/amdgpu/psp: move PSP memory alloc from hw_init to sw_init

Memory allocations should be done in sw_init.  hw_init should
just be hardware programming needed to initialize the IP block.
This is how most other IP blocks work.  Move the GPU memory
allocations from psp hw_init to psp sw_init and move the memory
free to sw_fini.  This also fixes a potential GPU memory leak
if psp hw_init fails.

Signed-off-by: Alex Deucher <alexander.deucher@xxxxxxx>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 95 ++++++++++++-------------
 1 file changed, 47 insertions(+), 48 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index a6acec1a6155..21aa556a6bef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -357,7 +357,39 @@ static int psp_sw_init(void *handle)
 		}
 	}
 
+	ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
+				      amdgpu_sriov_vf(adev) ?
+				      AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
+				      &psp->fw_pri_bo,
+				      &psp->fw_pri_mc_addr,
+				      &psp->fw_pri_buf);
+	if (ret)
+		return ret;
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->fence_buf_bo,
+				      &psp->fence_buf_mc_addr,
+				      &psp->fence_buf);
+	if (ret)
+		goto failed1;
+
+	ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+				      (void **)&psp->cmd_buf_mem);
+	if (ret)
+		goto failed2;
+
 	return 0;
+
+failed2:
+	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+failed1:
+	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+	return ret;
 }
 
 static int psp_sw_fini(void *handle)
@@ -391,6 +423,13 @@ static int psp_sw_fini(void *handle)
 	kfree(cmd);
 	cmd = NULL;
 
+	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
+			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
+	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
+			      &psp->fence_buf_mc_addr, &psp->fence_buf);
+	amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
+			      (void **)&psp->cmd_buf_mem);
+
 	return 0;
 }
 
@@ -2430,51 +2469,18 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	struct psp_context *psp = &adev->psp;
 
 	if (amdgpu_sriov_vf(adev) && amdgpu_in_reset(adev)) {
-		psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
-		goto skip_memalloc;
-	}
-
-	if (amdgpu_sriov_vf(adev)) {
-		ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-						AMDGPU_GEM_DOMAIN_VRAM,
-						&psp->fw_pri_bo,
-						&psp->fw_pri_mc_addr,
-						&psp->fw_pri_buf);
+		/* should not destroy ring, only stop */
+		psp_ring_stop(psp, PSP_RING_TYPE__KM);
 	} else {
-		ret = amdgpu_bo_create_kernel(adev, PSP_1_MEG, PSP_1_MEG,
-						AMDGPU_GEM_DOMAIN_GTT,
-						&psp->fw_pri_bo,
-						&psp->fw_pri_mc_addr,
-						&psp->fw_pri_buf);
-	}
-
-	if (ret)
-		goto failed;
-
-	ret = amdgpu_bo_create_kernel(adev, PSP_FENCE_BUFFER_SIZE, PAGE_SIZE,
-					AMDGPU_GEM_DOMAIN_VRAM,
-					&psp->fence_buf_bo,
-					&psp->fence_buf_mc_addr,
-					&psp->fence_buf);
-	if (ret)
-		goto failed;
-
-	ret = amdgpu_bo_create_kernel(adev, PSP_CMD_BUFFER_SIZE, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
-				      (void **)&psp->cmd_buf_mem);
-	if (ret)
-		goto failed;
+		memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
 
-	memset(psp->fence_buf, 0, PSP_FENCE_BUFFER_SIZE);
-
-	ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
-	if (ret) {
-		DRM_ERROR("PSP ring init failed!\n");
-		goto failed;
+		ret = psp_ring_init(psp, PSP_RING_TYPE__KM);
+		if (ret) {
+			DRM_ERROR("PSP ring init failed!\n");
+			goto failed;
+		}
 	}
 
-skip_memalloc:
 	ret = psp_hw_start(psp);
 	if (ret)
 		goto failed;
@@ -2592,13 +2598,6 @@ static int psp_hw_fini(void *handle)
 	psp_tmr_terminate(psp);
 	psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 
-	amdgpu_bo_free_kernel(&psp->fw_pri_bo,
-			      &psp->fw_pri_mc_addr, &psp->fw_pri_buf);
-	amdgpu_bo_free_kernel(&psp->fence_buf_bo,
-			      &psp->fence_buf_mc_addr, &psp->fence_buf);
-	amdgpu_bo_free_kernel(&psp->cmd_buf_bo, &psp->cmd_buf_mc_addr,
-			      (void **)&psp->cmd_buf_mem);
-
 	return 0;
 }
 
-- 
2.35.1


[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux