Re: [PATCH 04/12] drm/amdgpu: move IV prescreening into the GMC code

Christian König <ckoenig.leichtzumerken@xxxxxxxxx> · Wed, 10 Oct 2018 09:08:01 +0200

Yeah, exactly my thinking.

Basically the long term goal is to move most of the reporting and 
handling of faults into amdgpu_gmc.c. Otherwise we would duplicate a lot 
of handling for future hw generations.

On the other hand if the approach with the second IH ring buffer works 
as expected we most likely won't need the pre-screening anymore at all. 
But that needs more work to be 100% sure.

Christian.

Am 10.10.2018 um 01:46 schrieb Felix Kuehling:
I realized that most of the code in gmc_v9_0_psescreen_iv is not
actually hardware-specific. If it was not prescreening, but using an
amdgpu_iv_entry that was already parsed, I think it could just be a
generic function for processing retry faults:

   * looking up the VM of a fault
   * storing retry faults in a per-VM fifo
   * dropping faults that have already been seen

In other words, it's just a generic top half interrupt handler for retry
faults while the bottom half (worker thread) would use the per-VM FIFOs
to handle those pending retry faults.

Regards,
   Felix


On 2018-09-26 09:53 AM, Christian König wrote:
The GMC/VM subsystem is causing the faults, so move the handling here as
well.

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c  | 59 +++++++++++++++++++++++++++++
  drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 69 ----------------------------------
  2 files changed, 59 insertions(+), 69 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 729a2c230f91..f8d69ab85fc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
  	return 0;
  }
  
+/**
+ * vega10_ih_prescreen_iv - prescreen an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns true if the interrupt vector should be further processed.
+ */
+static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev,
+				  struct amdgpu_iv_entry *entry,
+				  uint64_t addr)
+{
+	struct amdgpu_vm *vm;
+	u64 key;
+	int r;
+
+	/* No PASID, can't identify faulting process */
+	if (!entry->pasid)
+		return true;
+
+	/* Not a retry fault */
+	if (!(entry->src_data[1] & 0x80))
+		return true;
+
+	/* Track retry faults in per-VM fault FIFO. */
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid);
+	if (!vm) {
+		/* VM not found, process it normally */
+		spin_unlock(&adev->vm_manager.pasid_lock);
+		return true;
+	}
+
+	key = AMDGPU_VM_FAULT(entry->pasid, addr);
+	r = amdgpu_vm_add_fault(vm->fault_hash, key);
+
+	/* Hash table is full or the fault is already being processed,
+	 * ignore further page faults
+	 */
+	if (r != 0) {
+		spin_unlock(&adev->vm_manager.pasid_lock);
+		return false;
+	}
+	/* No locking required with single writer and single reader */
+	r = kfifo_put(&vm->faults, key);
+	if (!r) {
+		/* FIFO is full. Ignore it until there is space */
+		amdgpu_vm_clear_fault(vm->fault_hash, key);
+		spin_unlock(&adev->vm_manager.pasid_lock);
+		return false;
+	}
+
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	/* It's the first fault for this address, process it normally */
+	return true;
+}
+
  static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
  				struct amdgpu_irq_src *source,
  				struct amdgpu_iv_entry *entry)
@@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
  	addr = (u64)entry->src_data[0] << 12;
  	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
  
+	if (!gmc_v9_0_prescreen_iv(adev, entry, addr))
+		return 1;
+
  	if (!amdgpu_sriov_vf(adev)) {
  		status = RREG32(hub->vm_l2_pro_fault_status);
  		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index 0f50bef87163..0f68a0cd1fbf 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -228,76 +228,7 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
   */
  static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
  {
-	u32 ring_index = adev->irq.ih.rptr >> 2;
-	u32 dw0, dw3, dw4, dw5;
-	u16 pasid;
-	u64 addr, key;
-	struct amdgpu_vm *vm;
-	int r;
-
-	dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
-	dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
-	dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
-	dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
-
-	/* Filter retry page faults, let only the first one pass. If
-	 * there are too many outstanding faults, ignore them until
-	 * some faults get cleared.
-	 */
-	switch (dw0 & 0xff) {
-	case SOC15_IH_CLIENTID_VMC:
-	case SOC15_IH_CLIENTID_UTCL2:
-		break;
-	default:
-		/* Not a VM fault */
-		return true;
-	}
-
-	pasid = dw3 & 0xffff;
-	/* No PASID, can't identify faulting process */
-	if (!pasid)
-		return true;
-
-	/* Not a retry fault */
-	if (!(dw5 & 0x80))
-		return true;
-
-	/* Track retry faults in per-VM fault FIFO. */
-	spin_lock(&adev->vm_manager.pasid_lock);
-	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-	addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
-	key = AMDGPU_VM_FAULT(pasid, addr);
-	if (!vm) {
-		/* VM not found, process it normally */
-		spin_unlock(&adev->vm_manager.pasid_lock);
-		return true;
-	} else {
-		r = amdgpu_vm_add_fault(vm->fault_hash, key);
-
-		/* Hash table is full or the fault is already being processed,
-		 * ignore further page faults
-		 */
-		if (r != 0) {
-			spin_unlock(&adev->vm_manager.pasid_lock);
-			goto ignore_iv;
-		}
-	}
-	/* No locking required with single writer and single reader */
-	r = kfifo_put(&vm->faults, key);
-	if (!r) {
-		/* FIFO is full. Ignore it until there is space */
-		amdgpu_vm_clear_fault(vm->fault_hash, key);
-		spin_unlock(&adev->vm_manager.pasid_lock);
-		goto ignore_iv;
-	}
-
-	spin_unlock(&adev->vm_manager.pasid_lock);
-	/* It's the first fault for this address, process it normally */
  	return true;
-
-ignore_iv:
-	adev->irq.ih.rptr += 32;
-	return false;
  }
  
  /**

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx