Re: [PATCH 1/2] drm/amdgpu: drop volatile from ring buffer

Christian König <ckoenig.leichtzumerken@xxxxxxxxx> · Wed, 9 Oct 2024 14:07:24 +0200

Am 09.10.24 um 09:41 schrieb Tvrtko Ursulin:

On 08/10/2024 19:11, Christian König wrote:
Volatile only prevents the compiler from re-ordering reads and writes.
Since we always only modify the ring buffer from one CPU thread and have
an explicit barrier before signaling the HW this should have no 
effect at
all and just prevents compiler optimisations.

Signed-off-by: Christian König <christian.koenig@xxxxxxx>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 10 +++-------
  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 11 ++++-------
  2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 910293664902..a6e28fe3f8d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -109,21 +109,17 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, 
unsigned int ndw)
  void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
  {
      uint32_t occupied, chunk1, chunk2;
-    uint32_t *dst;
        occupied = ring->wptr & ring->buf_mask;
-    dst = &ring->ring[occupied];
      chunk1 = ring->buf_mask + 1 - occupied;
      chunk1 = (chunk1 >= count) ? count : chunk1;
      chunk2 = count - chunk1;
        if (chunk1)
-        memset32(dst, ring->funcs->nop, chunk1);
+        memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
  -    if (chunk2) {
-        dst = ring->ring;
-        memset32(dst, ring->funcs->nop, chunk2);
-    }
+    if (chunk2)
+        memset32(ring->ring, ring->funcs->nop, chunk2);
        ring->wptr += count;
      ring->wptr &= ring->ptr_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 574336d6714a..36fc9578c53c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -246,7 +246,7 @@ struct amdgpu_ring {
      struct drm_gpu_scheduler    sched;
        struct amdgpu_bo    *ring_obj;
-    volatile uint32_t    *ring;
+    uint32_t        *ring;
      unsigned        rptr_offs;
      u64            rptr_gpu_addr;
      volatile u32        *rptr_cpu_addr;
@@ -288,7 +288,7 @@ struct amdgpu_ring {
      u64            cond_exe_gpu_addr;
      volatile u32        *cond_exe_cpu_addr;
      unsigned int        set_q_mode_offs;
-    volatile u32        *set_q_mode_ptr;
+    u32            *set_q_mode_ptr;
      u64            set_q_mode_token;
      unsigned        vm_hub;
      unsigned        vm_inv_eng;
@@ -386,10 +386,8 @@ static inline void 
amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
                            void *src, int count_dw)
  {
      unsigned occupied, chunk1, chunk2;
-    void *dst;
        occupied = ring->wptr & ring->buf_mask;
-    dst = (void *)&ring->ring[occupied];
      chunk1 = ring->buf_mask + 1 - occupied;
      chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
      chunk2 = count_dw - chunk1;
@@ -397,12 +395,11 @@ static inline void 
amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
      chunk2 <<= 2;
        if (chunk1)
-        memcpy(dst, src, chunk1);
+        memcpy(&ring->ring[occupied], src, chunk1);
        if (chunk2) {
          src += chunk1;
-        dst = (void *)ring->ring;
-        memcpy(dst, src, chunk2);
+        memcpy(ring->ring, src, chunk2);
      }
        ring->wptr += count_dw;

Dropping volatile and removal of local variables in 
amdgpu_ring_insert_nop() and amdgpu_ring_write_multiple() look 
unrelated to me. Best practice would be if you had split those two 
changes. Or at least add to the commit message the usual "while at it" 
blurb.

To me it looked like the main purpose of the local variable was to add 
the void* cast to avoid the compiler warning of removing the volatile 
when giving the parameter to memcpy().

Since that isn't needed any more I've got rid of the local variable as well.

Regards,
Christian.


Regards,

Tvrtko