Re: [PATCH V3] Revert "drm/amdgpu: remove vm sanity check from amdgpu_vm_make_compute" for Raven

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2024-02-29 01:04, Jesse.Zhang wrote:
fix the issue:
"amdgpu: Failed to create process VM object".

[Why]when amdgpu initialized, seq64 do mampping and update bo mapping in vm page table.
But when clifo run. It also initializes a vm for a process device through the function kfd_process_device_init_vm and ensure the root PD is clean through the function amdgpu_vm_pt_is_root_clean.
So they have a conflict, and clinfo  always failed.

v1:
   - remove all the pte_supports_ats stuff from the amdgpu_vm code (Felix)

Signed-off-by: Jesse Zhang <Jesse.Zhang@xxxxxxx>
The headline should be updated. This is no longer a revert of the quoted patch. Other than that, this patch looks reasonable to me. One more comment inline.

---
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    | 23 ----------
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  3 --
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 56 +----------------------
  3 files changed, 1 insertion(+), 81 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ed4a8c5d26d7..d004ace79536 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1385,10 +1385,6 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
  			struct amdgpu_bo_va_mapping, list);
  		list_del(&mapping->list);
- if (vm->pte_support_ats &&
-		    mapping->start < AMDGPU_GMC_HOLE_START)
-			init_pte_value = AMDGPU_PTE_DEFAULT_ATC;
-
  		r = amdgpu_vm_update_range(adev, vm, false, false, true, false,
  					   resv, mapping->start, mapping->last,
  					   init_pte_value, 0, 0, NULL, NULL,
@@ -2264,7 +2260,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  	if (r)
  		return r;
- vm->pte_support_ats = false;
  	vm->is_compute_context = false;
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
@@ -2350,30 +2345,12 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
   */
  int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  {
-	bool pte_support_ats = (adev->asic_type == CHIP_RAVEN);
  	int r;
r = amdgpu_bo_reserve(vm->root.bo, true);
  	if (r)
  		return r;
- /* Check if PD needs to be reinitialized and do it before
-	 * changing any other state, in case it fails.
-	 */
-	if (pte_support_ats != vm->pte_support_ats) {
-		/* Sanity checks */
-		if (!amdgpu_vm_pt_is_root_clean(adev, vm)) {
-			r = -EINVAL;
-			goto unreserve_bo;
-		}
-
-		vm->pte_support_ats = pte_support_ats;
-		r = amdgpu_vm_pt_clear(adev, vm, to_amdgpu_bo_vm(vm->root.bo),
-				       false);
-		if (r)
-			goto unreserve_bo;
-	}
-
  	/* Update VM state */
  	vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
  				    AMDGPU_VM_USE_CPU_FOR_COMPUTE);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 42f6ddec50c1..9f6b5e1ccf34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -357,9 +357,6 @@ struct amdgpu_vm {
  	/* Functions to use for VM table updates */
  	const struct amdgpu_vm_update_funcs	*update_funcs;
- /* Flag to indicate ATS support from PTE for GFX9 */
-	bool			pte_support_ats;
-
  	/* Up to 128 pending retry page faults */
  	DECLARE_KFIFO(faults, u64, 128);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index a160265ddc07..2835cb3f76eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -89,22 +89,6 @@ static unsigned int amdgpu_vm_pt_num_entries(struct amdgpu_device *adev,
  	return AMDGPU_VM_PTE_COUNT(adev);
  }
-/**
- * amdgpu_vm_pt_num_ats_entries - return the number of ATS entries in the root PD
- *
- * @adev: amdgpu_device pointer
- *
- * Returns:
- * The number of entries in the root page directory which needs the ATS setting.
- */
-static unsigned int amdgpu_vm_pt_num_ats_entries(struct amdgpu_device *adev)
-{
-	unsigned int shift;
-
-	shift = amdgpu_vm_pt_level_shift(adev, adev->vm_manager.root_level);
-	return AMDGPU_GMC_HOLE_START >> (shift + AMDGPU_GPU_PAGE_SHIFT);
-}
-
  /**
   * amdgpu_vm_pt_entries_mask - the mask to get the entry number of a PD/PT
   *
@@ -379,7 +363,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  	struct ttm_operation_ctx ctx = { true, false };
  	struct amdgpu_vm_update_params params;
  	struct amdgpu_bo *ancestor = &vmbo->bo;
-	unsigned int entries, ats_entries;
+	unsigned int entries;
  	struct amdgpu_bo *bo = &vmbo->bo;
  	uint64_t addr;
  	int r, idx;
@@ -394,27 +378,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  	}
entries = amdgpu_bo_size(bo) / 8;
-	if (!vm->pte_support_ats) {
-		ats_entries = 0;
-
-	} else if (!bo->parent) {
-		ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
-		ats_entries = min(ats_entries, entries);
-		entries -= ats_entries;
-
-	} else {
-		struct amdgpu_vm_bo_base *pt;
-
-		pt = ancestor->vm_bo;
-		ats_entries = amdgpu_vm_pt_num_ats_entries(adev);
-		if ((pt - to_amdgpu_bo_vm(vm->root.bo)->entries) >=
-		    ats_entries) {
-			ats_entries = 0;
-		} else {
-			ats_entries = entries;
-			entries = 0;
-		}
-	}
r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
  	if (r)
@@ -445,23 +408,6 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
  		goto exit;
addr = 0;
-	if (ats_entries) {
-		uint64_t value = 0, flags;
-
-		flags = AMDGPU_PTE_DEFAULT_ATC;
-		if (level != AMDGPU_VM_PTB) {
-			/* Handle leaf PDEs as PTEs */
-			flags |= AMDGPU_PDE_PTE;
-			amdgpu_gmc_get_vm_pde(adev, level, &value, &flags);
-		}
-
-		r = vm->update_funcs->update(&params, vmbo, addr, 0,
-					     ats_entries, value, flags);
-		if (r)
-			goto exit;
-
-		addr += ats_entries * 8;
-	}
if (entries) {

I believe entries == 0 is not possible any more, so this condition could probably be removed.

Regards,
  Felix


  		uint64_t value = 0, flags = 0;



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux