[PATCH 06/18] drm/amdgpu/sriov:fix memory leak after gpu reset

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I didn't get your point... how could bo_create_kernel solve my issue ?

The thing here is during gpu reset we invoke hw_init for every hw component, and by design hw_init shouldn't doing anything software related, thus the BO allocating in hw_init is wrong,

Even switch to bo_create_kernel won't address the issue ...


BR Monk

-----Original Message-----
From: Christian König [mailto:ckoenig.leichtzumerken@xxxxxxxxx] 
Sent: 2017å¹´9æ??18æ?¥ 17:13
To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 06/18] drm/amdgpu/sriov:fix memory leak after gpu reset

Am 18.09.2017 um 08:11 schrieb Monk Liu:
> doing gpu reset will rerun all hw_init and thus ucode_init_bo is 
> invoked again, so we need to skip the fw_buf allocation during sriov 
> gpu reset to avoid memory leak.
>
> Change-Id: I31131eda1bd45ea2f5bdc50c5da5fc5a9fe9027d
> Signed-off-by: Monk Liu <Monk.Liu at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h       |  3 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 64 +++++++++++++++----------------
>   2 files changed, 35 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 6ff2959..3d0c633 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1185,6 +1185,9 @@ struct amdgpu_firmware {
>   
>   	/* gpu info firmware data pointer */
>   	const struct firmware *gpu_info_fw;
> +
> +	void *fw_buf_ptr;
> +	uint64_t fw_buf_mc;
>   };
>   
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> index f306374..6564902 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> @@ -360,8 +360,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode,
>   int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
>   {
>   	struct amdgpu_bo **bo = &adev->firmware.fw_buf;
> -	uint64_t fw_mc_addr;
> -	void *fw_buf_ptr = NULL;
>   	uint64_t fw_offset = 0;
>   	int i, err;
>   	struct amdgpu_firmware_info *ucode = NULL; @@ -372,37 +370,39 @@ 
> int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
>   		return 0;
>   	}
>   
> -	err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
> -				amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> -				AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
> -				NULL, NULL, 0, bo);
> -	if (err) {
> -		dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
> -		goto failed;
> -	}
> +	if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) {

Instead of all this better use amdgpu_bo_create_kernel(), this should already include most of the handling necessary here.

Christian.

> +		err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true,
> +					amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> +					AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS,
> +					NULL, NULL, 0, bo);
> +		if (err) {
> +			dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err);
> +			goto failed;
> +		}
>   
> -	err = amdgpu_bo_reserve(*bo, false);
> -	if (err) {
> -		dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err);
> -		goto failed_reserve;
> -	}
> +		err = amdgpu_bo_reserve(*bo, false);
> +		if (err) {
> +			dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err);
> +			goto failed_reserve;
> +		}
>   
> -	err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> -				&fw_mc_addr);
> -	if (err) {
> -		dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
> -		goto failed_pin;
> -	}
> +		err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT,
> +					&adev->firmware.fw_buf_mc);
> +		if (err) {
> +			dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err);
> +			goto failed_pin;
> +		}
>   
> -	err = amdgpu_bo_kmap(*bo, &fw_buf_ptr);
> -	if (err) {
> -		dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err);
> -		goto failed_kmap;
> -	}
> +		err = amdgpu_bo_kmap(*bo, &adev->firmware.fw_buf_ptr);
> +		if (err) {
> +			dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err);
> +			goto failed_kmap;
> +		}
>   
> -	amdgpu_bo_unreserve(*bo);
> +		amdgpu_bo_unreserve(*bo);
> +	}
>   
> -	memset(fw_buf_ptr, 0, adev->firmware.fw_size);
> +	memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size);
>   
>   	/*
>   	 * if SMU loaded firmware, it needn't add SMC, UVD, and VCE @@ 
> -421,14 +421,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev)
>   		ucode = &adev->firmware.ucode[i];
>   		if (ucode->fw) {
>   			header = (const struct common_firmware_header *)ucode->fw->data;
> -			amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset,
> -						    (void *)((uint8_t *)fw_buf_ptr + fw_offset));
> +			amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset,
> +						    adev->firmware.fw_buf_ptr + fw_offset);
>   			if (i == AMDGPU_UCODE_ID_CP_MEC1 &&
>   			    adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
>   				const struct gfx_firmware_header_v1_0 *cp_hdr;
>   				cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data;
> -				amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset,
> -						    fw_buf_ptr + fw_offset);
> +				amdgpu_ucode_patch_jt(ucode,  adev->firmware.fw_buf_mc + fw_offset,
> +						    adev->firmware.fw_buf_ptr + fw_offset);
>   				fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE);
>   			}
>   			fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux