Re: [PATCH v2 06/07] drm/amdgpu: add option params to enforce process isolation between graphics and compute

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I can't see the other patches in this series, so I'm missing some context. I don't understand what "process isolation between graphics and compute" means here. It seems to be unrelated to KFD compute. This patch seems to be mostly about handling of reserved VMIDs. Maybe you're trying to avoid running Vulcan graphics and compute concurrently? But this does not prevent concurrency with KFD compute.

Can you clarify what this is for?

Thanks,
  Felix


Am 2023-06-07 um 06:57 schrieb Chong Li:
enforce process isolation between graphics and compute via using the same reserved vmid.

v2: remove params "struct amdgpu_vm *vm" from
     amdgpu_vmid_alloc_reserved and amdgpu_vmid_free_reserved.

Signed-off-by: Chong Li <chongli2@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  1 +
  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 10 +++++++++-
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 17 +++++++----------
  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  6 ++----
  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 22 +++++++++++++++++-----
  5 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index ce196badf42d..ef098a7287d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -215,6 +215,7 @@ extern int amdgpu_force_asic_type;
  extern int amdgpu_smartshift_bias;
  extern int amdgpu_use_xgmi_p2p;
  extern int amdgpu_mtype_local;
+extern bool enforce_isolation;
  #ifdef CONFIG_HSA_AMD
  extern int sched_policy;
  extern bool debug_evictions;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3d91e123f9bd..fdb6fb8229ab 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -153,7 +153,7 @@ uint amdgpu_pg_mask = 0xffffffff;
  uint amdgpu_sdma_phase_quantum = 32;
  char *amdgpu_disable_cu;
  char *amdgpu_virtual_display;
-
+bool enforce_isolation;
  /*
   * OverDrive(bit 14) disabled by default
   * GFX DCS(bit 19) disabled by default
@@ -973,6 +973,14 @@ MODULE_PARM_DESC(
  						4 = AMDGPU_CPX_PARTITION_MODE)");
  module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
+
+/**
+ * DOC: enforce_isolation (bool)
+ * enforce process isolation between graphics and compute via using the same reserved vmid.
+ */
+module_param(enforce_isolation, bool, 0444);
+MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
+
  /* These devices are not supported by amdgpu.
   * They are supported by the mach64, r128, radeon drivers
   */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index c991ca0b7a1c..ff1ea99292fb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -409,7 +409,7 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  	if (r || !idle)
  		goto error;
- if (vm->reserved_vmid[vmhub]) {
+	if (vm->reserved_vmid[vmhub] || (enforce_isolation && (vmhub == AMDGPU_GFXHUB(0)))) {
  		r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
  		if (r || !id)
  			goto error;
@@ -460,14 +460,11 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  }
int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
-			       struct amdgpu_vm *vm,
  			       unsigned vmhub)
  {
  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub])
-		goto unlock;
++id_mgr->reserved_use_count;
  	if (!id_mgr->reserved) {
@@ -479,27 +476,23 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
  		list_del_init(&id->list);
  		id_mgr->reserved = id;
  	}
-	vm->reserved_vmid[vmhub] = true;
-unlock:
  	mutex_unlock(&id_mgr->lock);
  	return 0;
  }
void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
-			       struct amdgpu_vm *vm,
  			       unsigned vmhub)
  {
  	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub] &&
-	    !--id_mgr->reserved_use_count) {
+	if (!--id_mgr->reserved_use_count) {
  		/* give the reserved ID back to normal round robin */
  		list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
  		id_mgr->reserved = NULL;
  	}
-	vm->reserved_vmid[vmhub] = false;
+
  	mutex_unlock(&id_mgr->lock);
  }
@@ -578,6 +571,10 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
  			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
  		}
  	}
+	/* alloc a default reserved vmid to enforce isolation */
+	if (enforce_isolation)
+		amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+
  }
/**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index d1cc09b45da4..68add23dc87c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -79,11 +79,9 @@ void amdgpu_pasid_free_delayed(struct dma_resv *resv,
  bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
  			       struct amdgpu_vmid *id);
  int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
-			       struct amdgpu_vm *vm,
-			       unsigned vmhub);
+					unsigned vmhub);
  void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
-			       struct amdgpu_vm *vm,
-			       unsigned vmhub);
+					unsigned vmhub);
  int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
  		     struct amdgpu_job *job, struct dma_fence **fence);
  void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index ea3d0be152fc..73900ab545c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2358,8 +2358,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
  	}
dma_fence_put(vm->last_update);
-	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++)
-		amdgpu_vmid_free_reserved(adev, vm, i);
+
+	for (i = 0; i < AMDGPU_MAX_VMHUBS; i++) {
+		if (vm->reserved_vmid[i]) {
+			amdgpu_vmid_free_reserved(adev, i);
+			vm->reserved_vmid[i] = false;
+		}
+	}
+
  }
/**
@@ -2447,13 +2453,19 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
  	switch (args->in.op) {
  	case AMDGPU_VM_OP_RESERVE_VMID:
  		/* We only have requirement to reserve vmid from gfxhub */
-		r = amdgpu_vmid_alloc_reserved(adev, &fpriv->vm,
-					       AMDGPU_GFXHUB(0));
+		if (!fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+			r = amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(0));
+			fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = true;
+		}
+
  		if (r)
  			return r;
  		break;
  	case AMDGPU_VM_OP_UNRESERVE_VMID:
-		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB(0));
+		if (fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)]) {
+			amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(0));
+			fpriv->vm.reserved_vmid[AMDGPU_GFXHUB(0)] = false;
+		}
  		break;
  	default:
  		return -EINVAL;



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux