Re: [RFC PATCH v2] drm/amdgpu/ttm: optimize vram access in amdgpu_ttm_access_memory()

Christian König <ckoenig.leichtzumerken@xxxxxxxxx> · Tue, 13 Jul 2021 09:11:30 +0200

Am 13.07.21 um 05:23 schrieb Kevin Wang:
1. using vram aper to access vram if possible
2. avoid MM_INDEX/MM_DATA is not working when mmio protect feature is
enabled.

Signed-off-by: Kevin Wang <kevin1.wang@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 126 +++++++++++++++++-------
  1 file changed, 89 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 2aa2eb5de37a..8f6f605bc459 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1407,6 +1407,91 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
  	return ttm_bo_eviction_valuable(bo, place);
  }
  
+static void amdgpu_ttm_vram_mm_align_access(struct amdgpu_device *adev, loff_t pos,
+					    uint32_t *value, bool write)

Please drop the _vram_ and _align_ part from the name. Just 
amdgpu_device_mm_access.

+{
+	unsigned long flags;
+
+	BUG_ON(!IS_ALIGNED(pos, 4));
+
+	spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+	WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+	WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+	if (write)
+		WREG32_NO_KIQ(mmMM_DATA, *value);
+	else
+		*value = RREG32_NO_KIQ(mmMM_DATA);
+	spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+}

That should still be in amdgpu_device.c and you completely messed up the 
drm_dev_enter()/drm_dev_exit() annotation.

Looks like you are working on an old branch where that is not yet merged?

+
+static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
+				      void *buf, size_t size, bool write)
+{
+	while (size) {
+		uint64_t aligned_pos = ALIGN_DOWN(pos, 4);
+		uint64_t bytes = 4 - (pos & 0x3);
+		uint32_t shift = (pos & 0x3) * 8;
+		uint32_t mask = 0xffffffff << shift;
+		uint32_t value = 0;
+
+		if (size < bytes) {
+			mask &= 0xffffffff >> (bytes - size) * 8;
+			bytes = size;
+		}
+
+		if (mask != 0xffffffff) {
+			amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, false);
+			if (write) {
+				value &= ~mask;
+				value |= (*(uint32_t *)buf << shift) & mask;
+				amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, &value, true);
+			} else {
+				value = (value & mask) >> shift;
+				memcpy(buf, &value, bytes);
+			}
+		} else {
+			amdgpu_ttm_vram_mm_align_access(adev, aligned_pos, buf, write);
+		}
+
+		pos += bytes;
+		buf += bytes;
+		size -= bytes;
+	}
+}
+
+static void amdgpu_ttm_vram_access(struct amdgpu_device *adev, loff_t pos,
+				   void *buf, size_t size, bool write)
+{
+	uint64_t last;
+
+#ifdef CONFIG_64BIT
+	last = min(pos + size, adev->gmc.visible_vram_size);
+	if (last > pos) {
+		void __iomem *addr = adev->mman.aper_base_kaddr + pos;
+		size_t count = last - pos;
+
+		if (write) {
+			memcpy_toio(addr, buf, count);
+			mb();
+			amdgpu_device_flush_hdp(adev, NULL);
+		} else {
+			amdgpu_device_invalidate_hdp(adev, NULL);
+			mb();
+			memcpy_fromio(buf, addr, count);
+		}
+
+		if (count == size)
+			return;
+
+		pos += count;
+		buf += count;
+		size -= count;
+	}
+#endif

I would put this as a separate function into amdgpu_device.c.

But all of this should only be the second step since we need a much 
smaller patch for backporting first.

+
+	amdgpu_ttm_vram_mm_access(adev, pos, buf, size, write);
+}
+
  /**
   * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
   *
@@ -1426,8 +1511,6 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
  	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
  	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
  	struct amdgpu_res_cursor cursor;
-	unsigned long flags;
-	uint32_t value = 0;
  	int ret = 0;
  
  	if (bo->mem.mem_type != TTM_PL_VRAM)
@@ -1435,41 +1518,10 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
  
  	amdgpu_res_first(&bo->mem, offset, len, &cursor);
  	while (cursor.remaining) {
-		uint64_t aligned_pos = cursor.start & ~(uint64_t)3;
-		uint64_t bytes = 4 - (cursor.start & 3);
-		uint32_t shift = (cursor.start & 3) * 8;
-		uint32_t mask = 0xffffffff << shift;
-
-		if (cursor.size < bytes) {
-			mask &= 0xffffffff >> (bytes - cursor.size) * 8;
-			bytes = cursor.size;
-		}
-
-		if (mask != 0xffffffff) {
-			spin_lock_irqsave(&adev->mmio_idx_lock, flags);
-			WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)aligned_pos) | 0x80000000);
-			WREG32_NO_KIQ(mmMM_INDEX_HI, aligned_pos >> 31);
-			value = RREG32_NO_KIQ(mmMM_DATA);
-			if (write) {
-				value &= ~mask;
-				value |= (*(uint32_t *)buf << shift) & mask;
-				WREG32_NO_KIQ(mmMM_DATA, value);
-			}
-			spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-			if (!write) {
-				value = (value & mask) >> shift;
-				memcpy(buf, &value, bytes);
-			}

This here is the problematic part and should use 
amdgpu_ttm_vram_access() instead.

That should be implemented first since we might need to backport that.

Regards,
Christian.

-		} else {
-			bytes = cursor.size & ~0x3ULL;
-			amdgpu_device_vram_access(adev, cursor.start,
-						  (uint32_t *)buf, bytes,
-						  write);
-		}
-
-		ret += bytes;
-		buf = (uint8_t *)buf + bytes;
-		amdgpu_res_next(&cursor, bytes);
+		amdgpu_ttm_vram_access(adev, cursor.start, buf, cursor.size, write);
+		ret += cursor.size;
+		buf += cursor.size;
+		amdgpu_res_next(&cursor, cursor.size);
  	}
  
  	return ret;

_______________________________________________
amd-gfx mailing list
amd-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/amd-gfx