On SKL+ the linear source buffer has to start from cache line boundary to meet the 2d engine source copy requirements. Apply this cache line alignment policy for SKL+ only. Signed-off-by: Guang Bai <guang.bai@xxxxxxxxx> --- src/sna/sna_io.c | 47 +++++++++++++++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index d32bd58..48d9354 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -1064,7 +1064,7 @@ tile: if (kgem->gen >= 0100) { cmd |= 8; do { - int nbox_this_time, rem; + int nbox_this_time, rem, pitch_aligned; nbox_this_time = nbox; rem = kgem_batch_space(kgem); @@ -1077,12 +1077,19 @@ tile: /* Count the total number of bytes to be read and allocate a * single buffer large enough. Or if it is very small, combine - * with other allocations. */ + * with other allocations. Each sub-buffer starting point has + * to be aligned to 64 bytes to conform latest hardware requirments. + * Align the pitch of each sub-buffer to 64 bytes for simplicities. + */ offset = 0; for (n = 0; n < nbox_this_time; n++) { int height = box[n].y2 - box[n].y1; int width = box[n].x2 - box[n].x1; - offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; + if (kgem->gen >= 0110) { + pitch_aligned = ALIGN(PITCH(width, dst->drawable.bitsPerPixel >> 3), 64); + offset += pitch_aligned * height; + } else + offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; } src_bo = kgem_create_buffer(kgem, offset, @@ -1113,14 +1120,24 @@ tile: assert(box->x1 + dst_dx >= 0); assert(box->y1 + dst_dy >= 0); - memcpy_blt(src, (char *)ptr + offset, - dst->drawable.bitsPerPixel, - stride, pitch, - box->x1 + src_dx, box->y1 + src_dy, - 0, 0, - width, height); + if (kgem->gen >= 0110) { + pitch_aligned = ALIGN(pitch, 64); + memcpy_blt(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch_aligned, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height); + } else + memcpy_blt(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height); assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; b[0] = cmd; b[1] = br13; @@ -1133,16 +1150,22 @@ tile: KGEM_RELOC_FENCED, 0); b[6] = 0; - b[7] = pitch; + if (kgem->gen >= 0110) + b[7] = pitch_aligned; + else + b[7] = pitch; + *(uint64_t *)(b+8) = kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, I915_GEM_DOMAIN_RENDER << 16 | KGEM_RELOC_FENCED, offset); kgem->nbatch += 10; - box++; - offset += pitch * height; + if (kgem->gen >= 0110) + offset += pitch_aligned * height; + else + offset += pitch * height; } while (--nbox_this_time); assert(offset == __kgem_buffer_size(src_bo)); sigtrap_put(); -- 2.7.4 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx