From: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> text data bss dec hex filename 10437711 542597 188232 11168540 aa6b1c amdgpu.ko.before 10418181 542597 188232 11149010 aa1ed2 amdgpu.ko.after Main reason seems to be amdgpu_ring_write() can avoid re-loading ring->wptr when called multiple times in sequence. Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@xxxxxxxxxx> --- drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index 158238f8c06a..b57951d8c997 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -385,8 +385,10 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v) { - ring->ring[ring->wptr++ & ring->buf_mask] = v; - ring->wptr &= ring->ptr_mask; + u64 wptr = ring->wptr; + + ring->ring[wptr++ & ring->buf_mask] = v; + ring->wptr = wptr & ring->ptr_mask; ring->count_dw--; } @@ -394,9 +396,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, void *src, int count_dw) { unsigned occupied, chunk1, chunk2; + u32 buf_mask = ring->buf_mask; + u64 wptr = ring->wptr; - occupied = ring->wptr & ring->buf_mask; - chunk1 = ring->buf_mask + 1 - occupied; + occupied = wptr & buf_mask; + chunk1 = buf_mask + 1 - occupied; chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1; chunk2 = count_dw - chunk1; chunk1 <<= 2; @@ -410,8 +414,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, memcpy(ring->ring, src, chunk2); } - ring->wptr += count_dw; - ring->wptr &= ring->ptr_mask; + wptr += count_dw; + ring->wptr = wptr & ring->ptr_mask; ring->count_dw -= count_dw; } -- 2.47.1