When filling the ring to align the emit pointer to the next cacheline, use memset64() rather than open-coding it. As we know that we always have an even number of dwords, we can replace the dword loop with the qword equivalent. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/intel_ringbuffer.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c index c68ac605b8a9..07a9a2b4beb7 100644 --- a/drivers/gpu/drm/i915/intel_ringbuffer.c +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c @@ -1717,22 +1717,24 @@ u32 *intel_ring_begin(struct i915_request *rq, unsigned int num_dwords) /* Align the ring tail to a cacheline boundary */ int intel_ring_cacheline_align(struct i915_request *rq) { - int num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); - u32 *cs; + int num_dwords; + void *cs; + num_dwords = (rq->ring->emit & (CACHELINE_BYTES - 1)) / sizeof(u32); if (num_dwords == 0) return 0; - num_dwords = CACHELINE_BYTES / sizeof(u32) - num_dwords; + num_dwords = CACHELINE_DWORDS - num_dwords; + GEM_BUG_ON(num_dwords & 1); + cs = intel_ring_begin(rq, num_dwords); if (IS_ERR(cs)) return PTR_ERR(cs); - while (num_dwords--) - *cs++ = MI_NOOP; - + memset64(cs, 0, num_dwords/2); intel_ring_advance(rq, cs); + GEM_BUG_ON(rq->ring->emit & CACHELINE_BYTES); return 0; } -- 2.17.0 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx