I didn't get your point... how could bo_create_kernel solve my issue ? The thing here is during gpu reset we invoke hw_init for every hw component, and by design hw_init shouldn't doing anything software related, thus the BO allocating in hw_init is wrong, Even switch to bo_create_kernel won't address the issue ... BR Monk -----Original Message----- From: Christian König [mailto:ckoenig.leichtzumerken@xxxxxxxxx] Sent: 2017å¹´9æ??18æ?¥ 17:13 To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org Subject: Re: [PATCH 06/18] drm/amdgpu/sriov:fix memory leak after gpu reset Am 18.09.2017 um 08:11 schrieb Monk Liu: > doing gpu reset will rerun all hw_init and thus ucode_init_bo is > invoked again, so we need to skip the fw_buf allocation during sriov > gpu reset to avoid memory leak. > > Change-Id: I31131eda1bd45ea2f5bdc50c5da5fc5a9fe9027d > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 3 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c | 64 +++++++++++++++---------------- > 2 files changed, 35 insertions(+), 32 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index 6ff2959..3d0c633 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -1185,6 +1185,9 @@ struct amdgpu_firmware { > > /* gpu info firmware data pointer */ > const struct firmware *gpu_info_fw; > + > + void *fw_buf_ptr; > + uint64_t fw_buf_mc; > }; > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > index f306374..6564902 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c > @@ -360,8 +360,6 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, > int amdgpu_ucode_init_bo(struct amdgpu_device *adev) > { > struct amdgpu_bo **bo = &adev->firmware.fw_buf; > - uint64_t fw_mc_addr; > - void *fw_buf_ptr = NULL; > uint64_t fw_offset = 0; > int i, err; > struct amdgpu_firmware_info *ucode = NULL; @@ -372,37 +370,39 @@ > int amdgpu_ucode_init_bo(struct amdgpu_device *adev) > return 0; > } > > - err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, > - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, > - AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, > - NULL, NULL, 0, bo); > - if (err) { > - dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); > - goto failed; > - } > + if (!amdgpu_sriov_vf(adev) || !adev->in_sriov_reset) { Instead of all this better use amdgpu_bo_create_kernel(), this should already include most of the handling necessary here. Christian. > + err = amdgpu_bo_create(adev, adev->firmware.fw_size, PAGE_SIZE, true, > + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, > + AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS, > + NULL, NULL, 0, bo); > + if (err) { > + dev_err(adev->dev, "(%d) Firmware buffer allocate failed\n", err); > + goto failed; > + } > > - err = amdgpu_bo_reserve(*bo, false); > - if (err) { > - dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err); > - goto failed_reserve; > - } > + err = amdgpu_bo_reserve(*bo, false); > + if (err) { > + dev_err(adev->dev, "(%d) Firmware buffer reserve failed\n", err); > + goto failed_reserve; > + } > > - err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, > - &fw_mc_addr); > - if (err) { > - dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err); > - goto failed_pin; > - } > + err = amdgpu_bo_pin(*bo, amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, > + &adev->firmware.fw_buf_mc); > + if (err) { > + dev_err(adev->dev, "(%d) Firmware buffer pin failed\n", err); > + goto failed_pin; > + } > > - err = amdgpu_bo_kmap(*bo, &fw_buf_ptr); > - if (err) { > - dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err); > - goto failed_kmap; > - } > + err = amdgpu_bo_kmap(*bo, &adev->firmware.fw_buf_ptr); > + if (err) { > + dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n", err); > + goto failed_kmap; > + } > > - amdgpu_bo_unreserve(*bo); > + amdgpu_bo_unreserve(*bo); > + } > > - memset(fw_buf_ptr, 0, adev->firmware.fw_size); > + memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size); > > /* > * if SMU loaded firmware, it needn't add SMC, UVD, and VCE @@ > -421,14 +421,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) > ucode = &adev->firmware.ucode[i]; > if (ucode->fw) { > header = (const struct common_firmware_header *)ucode->fw->data; > - amdgpu_ucode_init_single_fw(adev, ucode, fw_mc_addr + fw_offset, > - (void *)((uint8_t *)fw_buf_ptr + fw_offset)); > + amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset, > + adev->firmware.fw_buf_ptr + fw_offset); > if (i == AMDGPU_UCODE_ID_CP_MEC1 && > adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { > const struct gfx_firmware_header_v1_0 *cp_hdr; > cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; > - amdgpu_ucode_patch_jt(ucode, fw_mc_addr + fw_offset, > - fw_buf_ptr + fw_offset); > + amdgpu_ucode_patch_jt(ucode, adev->firmware.fw_buf_mc + fw_offset, > + adev->firmware.fw_buf_ptr + fw_offset); > fw_offset += ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); > } > fw_offset += ALIGN(ucode->ucode_size, PAGE_SIZE);