[PATCH] drm/amd/amdgpu: cover fragment size between 4 and 9 when not aligned

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



this can get performance improvement for some cases

Change-Id: Ibb58bb3099f7e8c4b5da90da73a03544cdb2bcb7
Signed-off-by: Roger He <Hongbo.He at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 98 +++++++++++++++++++++++++++-------
 1 file changed, 79 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 592c3e7..4e5da5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -1375,7 +1375,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 }
 
 /*
- * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ * amdgpu_vm_update_ptes_helper - add fragment information to PTEs
  *
  * @params: see amdgpu_pte_update_params definition
  * @vm: requested vm
@@ -1383,11 +1383,12 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
  * @end: last PTE to handle
  * @dst: addr those PTEs should point to
  * @flags: hw mapping flags
+ * @fragment: fragment size
  * Returns 0 for success, -EINVAL for failure.
  */
-static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
-				uint64_t start, uint64_t end,
-				uint64_t dst, uint64_t flags)
+static int amdgpu_vm_update_ptes_helper(struct amdgpu_pte_update_params *params,
+				  uint64_t start, uint64_t end, uint64_t dst,
+				  uint64_t flags, int fragment)
 {
 	int r;
 
@@ -1409,41 +1410,100 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 	 * Userspace can support this by aligning virtual base address and
 	 * allocation size to the fragment size.
 	 */
-	unsigned pages_per_frag = params->adev->vm_manager.fragment_size;
-	uint64_t frag_flags = AMDGPU_PTE_FRAG(pages_per_frag);
-	uint64_t frag_align = 1 << pages_per_frag;
+	uint64_t frag_flags, frag_align, frag_start, frag_end;
 
-	uint64_t frag_start = ALIGN(start, frag_align);
-	uint64_t frag_end = end & ~(frag_align - 1);
+	if (start > end || fragment < 0)
+		return -EINVAL;
 
-	/* system pages are non continuously */
-	if (params->src || !(flags & AMDGPU_PTE_VALID) ||
-	    (frag_start >= frag_end))
-		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
+	fragment = min(fragment, max(0, fls64(end - start) - 1));
+	frag_flags = AMDGPU_PTE_FRAG(fragment);
+	frag_align = 1 << fragment;
+	frag_start = ALIGN(start, frag_align);
+	frag_end = end & ~(frag_align - 1);
+
+	if (frag_start >= frag_end) {
+		if (fragment <= 4)
+			return amdgpu_vm_update_ptes(params, start, end,
+							dst, flags);
+		else
+			return amdgpu_vm_update_ptes_helper(params, start,
+					end, dst, flags, fragment - 1);
+	}
+
+	if (fragment <= 4) {
+		/* handle the 4K area at the beginning */
+		if (start != frag_start) {
+			r = amdgpu_vm_update_ptes(params, start, frag_start,
+						  dst, flags);
+			if (r)
+				return r;
+			dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
+		}
+
+		/* handle the area in the middle */
+		r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
+					  flags | frag_flags);
+		if (r)
+			return r;
+
+		/* handle the 4K area at the end */
+		if (frag_end != end) {
+			dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
+			r = amdgpu_vm_update_ptes(params, frag_end, end,
+							dst, flags);
+		}
+		return r;
+	}
 
-	/* handle the 4K area at the beginning */
+	/* handle the area at the beginning not aligned */
 	if (start != frag_start) {
-		r = amdgpu_vm_update_ptes(params, start, frag_start,
-					  dst, flags);
+		r = amdgpu_vm_update_ptes_helper(params, start, frag_start,
+						dst, flags, fragment - 1);
 		if (r)
 			return r;
 		dst += (frag_start - start) * AMDGPU_GPU_PAGE_SIZE;
 	}
 
-	/* handle the area in the middle */
+	/* handle the area in the middle aligned*/
 	r = amdgpu_vm_update_ptes(params, frag_start, frag_end, dst,
 				  flags | frag_flags);
 	if (r)
 		return r;
 
-	/* handle the 4K area at the end */
+	/* handle the area at the end not aligned */
 	if (frag_end != end) {
 		dst += (frag_end - frag_start) * AMDGPU_GPU_PAGE_SIZE;
-		r = amdgpu_vm_update_ptes(params, frag_end, end, dst, flags);
+		r = amdgpu_vm_update_ptes_helper(params, frag_end, end,
+						dst, flags, fragment - 1);
 	}
 	return r;
 }
 
+/*
+ * amdgpu_vm_frag_ptes - add fragment information to PTEs
+ *
+ * @params: see amdgpu_pte_update_params definition
+ * @vm: requested vm
+ * @start: first PTE to handle
+ * @end: last PTE to handle
+ * @dst: addr those PTEs should point to
+ * @flags: hw mapping flags
+ * Returns 0 for success, -EINVAL for failure.
+ */
+static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
+				uint64_t start, uint64_t end,
+				uint64_t dst, uint64_t flags)
+{
+	int fragment = params->adev->vm_manager.fragment_size;
+	/* system pages are non continuously */
+	if (params->src || !(flags & AMDGPU_PTE_VALID))
+		return amdgpu_vm_update_ptes(params, start, end, dst, flags);
+
+	/* vram */
+	return amdgpu_vm_update_ptes_helper(params, start, end, dst, flags,
+						fragment);
+}
+
 /**
  * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
  *
-- 
2.7.4



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux