[PATCH xf86-video-intel] sna/io: Align the linear source buffer to cache line for 2d blt

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On SKL+ the linear source buffer has to start from cache line boundary
to meet the 2d engine source copy requirements.

Signed-off-by: Guang Bai <guang.bai@xxxxxxxxx>
---
 src/sna/sna_io.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index d32bd58..5bfbdbb 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -1064,7 +1064,7 @@ tile:
 	if (kgem->gen >= 0100) {
 		cmd |= 8;
 		do {
-			int nbox_this_time, rem;
+			int nbox_this_time, rem, pitch_aligned;
 
 			nbox_this_time = nbox;
 			rem = kgem_batch_space(kgem);
@@ -1077,12 +1077,16 @@ tile:
 
 			/* Count the total number of bytes to be read and allocate a
 			 * single buffer large enough. Or if it is very small, combine
-			 * with other allocations. */
+			 * with other allocations. Each sub-buffer starting point has
+			 * to be aligned to 64 bytes to conform latest hardware requirments.
+			 * Align the pitch of each sub-buffer to 64 bytes for simplicities.
+			 */
 			offset = 0;
 			for (n = 0; n < nbox_this_time; n++) {
 				int height = box[n].y2 - box[n].y1;
 				int width = box[n].x2 - box[n].x1;
-				offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+				pitch_aligned = ALIGN(PITCH(width, dst->drawable.bitsPerPixel >> 3), 64);
+				offset += pitch_aligned * height;
 			}
 
 			src_bo = kgem_create_buffer(kgem, offset,
@@ -1113,9 +1117,10 @@ tile:
 					assert(box->x1 + dst_dx >= 0);
 					assert(box->y1 + dst_dy >= 0);
 
+					pitch_aligned = ALIGN(pitch, 64);
 					memcpy_blt(src, (char *)ptr + offset,
 						   dst->drawable.bitsPerPixel,
-						   stride, pitch,
+						   stride, pitch_aligned,
 						   box->x1 + src_dx, box->y1 + src_dy,
 						   0, 0,
 						   width, height);
@@ -1133,7 +1138,7 @@ tile:
 								 KGEM_RELOC_FENCED,
 								 0);
 					b[6] = 0;
-					b[7] = pitch;
+					b[7] = pitch_aligned;
 					*(uint64_t *)(b+8) =
 						kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
 								 I915_GEM_DOMAIN_RENDER << 16 |
@@ -1142,7 +1147,7 @@ tile:
 					kgem->nbatch += 10;
 
 					box++;
-					offset += pitch * height;
+					offset += pitch_aligned * height;
 				} while (--nbox_this_time);
 				assert(offset == __kgem_buffer_size(src_bo));
 				sigtrap_put();
-- 
2.7.4

_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx




[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux