Re: [PATCH] drm/amdkfd: Fix pasid value leak

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2025-02-11 17:54, Xiaogang.Chen wrote:
> From: Xiaogang Chen <xiaogang.chen@xxxxxxx>
> 
> Curret kfd does not allocate pasid values, instead uses pasid value for each
> vm from graphic driver. So should not prevent graphic driver from releasing
> pasid values since the values are allocated by graphic driver, not kfd driver
> anymore. This patch does not stop graphic driver release pasid values.
> 
> Fixes: 77b5e447427c(drm/amdkfd: Have kfd driver use same PASID values from
> graphic driver)
> 
> Signed-off-by: Xiaogang Chen xiaogang.chen@xxxxxxx
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  2 --
>  .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 21 -------------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c        | 14 -------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h        |  1 -
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c      |  3 +--
>  5 files changed, 1 insertion(+), 40 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 092dbd8bec97..236b73e283e8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -304,8 +304,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
>  					struct amdgpu_vm *avm,
>  					void **process_info,
>  					struct dma_fence **ef);
> -void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
> -					void *drm_priv);
>  uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv);
>  size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev,
>  					uint8_t xcp_id);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> index 60062c10b083..ea3f7ee18923 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
> @@ -1586,27 +1586,6 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
>  	}
>  }
>  
> -void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
> -					    void *drm_priv)
> -{
> -	struct amdgpu_vm *avm;
> -
> -	if (WARN_ON(!adev || !drm_priv))
> -		return;
> -
> -	avm = drm_priv_to_vm(drm_priv);
> -
> -	pr_debug("Releasing process vm %p\n", avm);
> -
> -	/* The original pasid of amdgpu vm has already been
> -	 * released during making a amdgpu vm to a compute vm
> -	 * The current pasid is managed by kfd and will be
> -	 * released on kfd process destroy. Set amdgpu pasid
> -	 * to 0 to avoid duplicate release.
> -	 */
> -	amdgpu_vm_release_compute(adev, avm);
> -}
> -
>  uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv)
>  {
>  	struct amdgpu_vm *avm = drm_priv_to_vm(drm_priv);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 48b2c0b3b315..ef4fe2df8398 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -2578,20 +2578,6 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
>  	return r;
>  }
>  
> -/**
> - * amdgpu_vm_release_compute - release a compute vm
> - * @adev: amdgpu_device pointer
> - * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute
> - *
> - * This is a correspondant of amdgpu_vm_make_compute. It decouples compute
> - * pasid from vm. Compute should stop use of vm after this call.
> - */
> -void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> -{
> -	amdgpu_vm_set_pasid(adev, vm, 0);
> -	vm->is_compute_context = false;
> -}
> -
>  /**
>   * amdgpu_vm_fini - tear down a vm instance
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 160889e5e64d..daa2f9b33620 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -484,7 +484,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
>  long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
>  int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int32_t xcp_id);
>  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> -void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
>  int amdgpu_vm_lock_pd(struct amdgpu_vm *vm, struct drm_exec *exec,
>  		      unsigned int num_fences);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index c75373fd6ef1..cc5907e96ded 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -1059,8 +1059,7 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
>  		kfd_process_device_destroy_ib_mem(pdd);
>  
>  		if (pdd->drm_file) {
> -			amdgpu_amdkfd_gpuvm_release_process_vm(
> -					pdd->dev->adev, pdd->drm_priv);
> +			drm_priv_to_vm(pdd->drm_priv)->is_compute_context = false;

Setting is_compute_context to false is unnecessary because this code runs after the user mode process has terminated. The fput call just below will cause the destruction of the VM. It also feels like a layering violation. So just remove this line.

Other than that, the patch is

Reviewed-by: Felix Kuehling <felix.kuehling@xxxxxxx>

>  			fput(pdd->drm_file);
>  		}
>  




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux