Re: [PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 24.05.23 um 11:23 schrieb Guchun Chen:
Recent code stores xcp_id to amdgpu bo for accounting memory
usage or find correct KFD node, and this xcp_id is from file
private data after opening device. However, not all VMs are
attached to this fpriv structure like the case in amdgpu_mes_self_test.
So add a flag to differentiate the cases. Otherwise, KASAN will
complain out of bound access.

The VM code shouldn't touch the fpriv in any way possible in the first place.

Instead we should provide the xcp_id during amdgpu_vm_init() and use it to allocate the root PD. All other PDs/PTs should then just use the xcp_id from the root PD.

Christian.


[   77.292314] BUG: KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.293845] Read of size 4 at addr ffff888102c48a48 by task modprobe/1069
[   77.294146] Call Trace:
[   77.294178]  <TASK>
[   77.294208]  dump_stack_lvl+0x49/0x63
[   77.294260]  print_report+0x16f/0x4a6
[   77.294307]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.295979]  ? kasan_complete_mode_report_info+0x3c/0x200
[   77.296057]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.297556]  kasan_report+0xb4/0x130
[   77.297609]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.299202]  __asan_load4+0x6f/0x90
[   77.299272]  amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[   77.300796]  ? amdgpu_init+0x6e/0x1000 [amdgpu]
[   77.302222]  ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
[   77.303721]  ? preempt_count_sub+0x18/0xc0
[   77.303786]  amdgpu_vm_init+0x39e/0x870 [amdgpu]
[   77.305186]  ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
[   77.306683]  ? kasan_set_track+0x25/0x30
[   77.306737]  ? kasan_save_alloc_info+0x1b/0x30
[   77.306795]  ? __kasan_kmalloc+0x87/0xa0
[   77.306852]  amdgpu_mes_self_test+0x169/0x620 [amdgpu]

Fixes: ffc6deb773f7("drm/amdkfd: Store xcp partition id to amdgpu bo")
Signed-off-by: Guchun Chen <guchun.chen@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  2 +-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  5 ++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  5 ++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 12 +++++++++---
  5 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 41d047e5de69..79b80f9233db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1229,7 +1229,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
  		pasid = 0;
  	}
- r = amdgpu_vm_init(adev, &fpriv->vm);
+	r = amdgpu_vm_init(adev, &fpriv->vm, true);
  	if (r)
  		goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 49bb6c03d606..3be5219edf88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1345,7 +1345,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
  		goto error_pasid;
  	}
- r = amdgpu_vm_init(adev, vm);
+	r = amdgpu_vm_init(adev, vm, false);
  	if (r) {
  		DRM_ERROR("failed to initialize vm\n");
  		goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 37b9d8a8dbec..47ffaa1526a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2099,13 +2099,15 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
   *
   * @adev: amdgpu_device pointer
   * @vm: requested vm
+ * @vm_attach_to_fpriv: flag to tell if vm is attached to file private data
   *
   * Init @vm fields.
   *
   * Returns:
   * 0 for success, error for failure.
   */
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		   bool vm_attach_to_fpriv)
  {
  	struct amdgpu_bo *root_bo;
  	struct amdgpu_bo_vm *root;
@@ -2131,6 +2133,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->pte_support_ats = false;
  	vm->is_compute_context = false;
+	vm->vm_attach_to_fpriv = vm_attach_to_fpriv;
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
  				    AMDGPU_VM_USE_CPU_FOR_GFX);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d551fca1780e..62ed14b1fc16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -333,6 +333,9 @@ struct amdgpu_vm {
  	/* Flag to indicate if VM is used for compute */
  	bool			is_compute_context;
+ /* Flag to tell if VM is attached to file private data */
+	bool vm_attach_to_fpriv;
+
  	/* Memory partition number, -1 means any partition */
  	int8_t			mem_id;
  };
@@ -392,7 +395,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  			u32 pasid);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool vm_attach_to_fpriv);
  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
  void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index cc3b1b596e56..16b3350aa896 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -502,7 +502,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  			int level, bool immediate, struct amdgpu_bo_vm **vmbo)
  {
-	struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+	struct amdgpu_fpriv *fpriv;
  	struct amdgpu_bo_param bp;
  	struct amdgpu_bo *bo;
  	struct dma_resv *resv;
@@ -535,7 +535,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
  	bp.no_wait_gpu = immediate;
-	bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	if (vm->vm_attach_to_fpriv) {
+		fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+		bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	}
if (vm->root.bo)
  		bp.resv = vm->root.bo->tbo.base.resv;
@@ -561,7 +564,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  	bp.type = ttm_bo_type_kernel;
  	bp.resv = bo->tbo.base.resv;
  	bp.bo_ptr_size = sizeof(struct amdgpu_bo);
-	bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	if (vm->vm_attach_to_fpriv) {
+		fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+		bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+	}
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux