[PATCH 2/2] drm/amdgpu: handle all fragment sizes v3

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Roger He <Hongbo.He@xxxxxxx>

This can improve performance for some cases.

v2 (chk): handle all sizes, simplify the patch quite a bit
v3 (chk): adjust dw estimation as well

Signed-off-by: Roger He <Hongbo.He at amd.com>
Signed-off-by: Christian König <christian.koenig at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 74 ++++++++++++++++++++++------------
 1 file changed, 49 insertions(+), 25 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index b08f031..1575657 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1415,8 +1415,6 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
 				uint64_t dst, uint64_t flags)
 {
-	int r;
-
 	/**
 	 * The MC L1 TLB supports variable sized pages, based on a fragment
 	 * field in the PTE. When this field is set to a non-zero value, page
@@ -1435,39 +1433,65 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * Userspace can support this by aligning virtual base address and
 	 * allocation size to the fragment size.
 	 */
-	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-	uint64_t frag_align = 1 << pages_per_frag;
+	unsigned max_frag = params->adev->vm_manager.fragment_size;
+	uint64_t frag_flags, frag_end;
+	unsigned frag;
 
-	uint64_t frag_start = ALIGN(start, frag_align);
-	uint64_t frag_end = end & ~(frag_align - 1);
+	int r;
 
 	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end))
+	if (params->src || !(flags & AMDGPU_PTE_VALID))
 		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
 
-	/* handle the 4K area at the beginning */
-	if (start != frag_start) {
-		r = amdgpu_vm_update_ptes(params, start, frag_start,
-					  dst, flags);
+	/* Handle the fragments at the beginning */
+	while (start != end) {
+		/* This intentionally wraps around if no bit is set */
+		frag = min(ffs(start), fls64(end - start)) - 1;
+		if (frag >= max_frag)
+			break;
+
+		frag_flags = AMDGPU_PTE_FRAG(frag);
+		frag_end = start + (1 << frag);
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
 		if (r)
 			return r;
-		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
 
 	/* handle the area in the middle */
-	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
-				  flags | frag_flags);
-	if (r)
-		return r;
+	if (start != end) {
+		frag_flags = AMDGPU_PTE_FRAG(max_frag);
+		frag_end = end & ~((1 << max_frag) - 1);
 
-	/* handle the 4K area at the end */
-	if (frag_end != end) {
-		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes(params, start, frag_end, dst,
+					  flags | frag_flags);
+		if (r)
+			return r;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
 	}
-	return r;
+
+	/* Handle the fragments at the end */
+	while (start != end) {
+		frag = fls64(end - start) - 1;
+		frag_flags = AMDGPU_PTE_FRAG(frag);
+		frag_end = start + (1 << frag);
+
+		r = amdgpu_vm_update_ptes(params, start, frag_end,
+					  dst, flags | frag_flags);
+		if (r)
+			return r;
+
+		dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE;
+		start = frag_end;
+	}
+
+	return 0;
 }
 
 /**
@@ -1557,8 +1581,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		/* set page commands needed */
 		ndw += ncmds * 10;
 
-		/* two extra commands for begin/end of fragment */
-		ndw += 2 * 10;
+		/* extra commands for begin/end fragments */
+		ndw += 2 * 10 * adev->vm_manager.fragment_size;
 
 		params.func = amdgpu_vm_do_set_ptes;
 	}
-- 
2.7.4



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux