Hi Felix Many thanks for your review. I have modified it according to your comments and suggestion. Best Regards Yintian Tao -----Original Message----- From: Kuehling, Felix <Felix.Kuehling@xxxxxxx> Sent: 2020年4月17日 23:39 To: Tao, Yintian <Yintian.Tao@xxxxxxx>; Liu, Monk <Monk.Liu@xxxxxxx> Cc: amd-gfx@xxxxxxxxxxxxxxxxxxxxx Subject: Re: [PATCH] drm/amdgpu: refine kiq read register Am 2020-04-17 um 2:53 a.m. schrieb Yintian Tao: > According to the current kiq read register method, there will be race > condition when using KIQ to read register if multiple clients want to > read at same time just like the expample below: > 1. client-A start to read REG-0 throguh KIQ 2. client-A poll the > seqno-0 3. client-B start to read REG-1 through KIQ 4. client-B poll > the seqno-1 5. the kiq complete these two read operation 6. client-A > to read the register at the wb buffer and > get REG-1 value > > Therefore, directly make kiq write the register value at the ring > buffer then there will be no race condition for the wb buffer. > > v2: supply the read_clock and move the reg_val_offs back > > Signed-off-by: Yintian Tao <yttao@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 11 ++++------ > drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 1 - > drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 5 +++-- > drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 14 +++++------- > drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 14 +++++------- > drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 28 ++++++++++++------------ > 6 files changed, 33 insertions(+), 40 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > index ea576b4260a4..4e1c0239e561 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c > @@ -304,10 +304,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device > *adev, > > spin_lock_init(&kiq->ring_lock); > > - r = amdgpu_device_wb_get(adev, &kiq->reg_val_offs); > - if (r) > - return r; > - > ring->adev = NULL; > ring->ring_obj = NULL; > ring->use_doorbell = true; > @@ -331,7 +327,6 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device > *adev, > > void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring) { > - amdgpu_device_wb_free(ring->adev, ring->adev->gfx.kiq.reg_val_offs); > amdgpu_ring_fini(ring); > } > > @@ -675,12 +670,14 @@ uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > uint32_t seq; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > struct amdgpu_ring *ring = &kiq->ring; > + uint64_t reg_val_offs = 0; > > BUG_ON(!ring->funcs->emit_rreg); > > spin_lock_irqsave(&kiq->ring_lock, flags); > amdgpu_ring_alloc(ring, 32); > - amdgpu_ring_emit_rreg(ring, reg); > + reg_val_offs = (ring->wptr & ring->buf_mask) + 30; I think that should be (ring->wptr + 30) & ring->buf_mask. Otherwise the reg_val_offset can be past the end of the ring. But that still leaves a problem if another command is submitted to the KIQ before you read the returned reg_val from the ring. Your reg_val can be overwritten by the new command and you get the wrong result. Or the command can be overwritten with the reg_val, which will most likely hang the CP. You could allocate space on the KIQ ring with a NOP command to prevent that space from being overwritten by other commands. Regards, Felix > + amdgpu_ring_emit_rreg(ring, reg, reg_val_offs); > amdgpu_fence_emit_polling(ring, &seq); > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -707,7 +704,7 @@ > uint32_t amdgpu_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) > if (cnt > MAX_KIQ_REG_TRY) > goto failed_kiq_read; > > - return adev->wb.wb[kiq->reg_val_offs]; > + return ring->ring[reg_val_offs]; > > failed_kiq_read: > pr_err("failed to read reg:%x\n", reg); diff --git > a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > index 634746829024..ee698f0246d8 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h > @@ -103,7 +103,6 @@ struct amdgpu_kiq { > struct amdgpu_ring ring; > struct amdgpu_irq_src irq; > const struct kiq_pm4_funcs *pmf; > - uint32_t reg_val_offs; > }; > > /* > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > index f61664ee4940..a3d88f2aa9f4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h > @@ -181,7 +181,8 @@ struct amdgpu_ring_funcs { > void (*end_use)(struct amdgpu_ring *ring); > void (*emit_switch_buffer) (struct amdgpu_ring *ring); > void (*emit_cntxcntl) (struct amdgpu_ring *ring, uint32_t flags); > - void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg); > + void (*emit_rreg)(struct amdgpu_ring *ring, uint32_t reg, > + uint64_t reg_val_offs); > void (*emit_wreg)(struct amdgpu_ring *ring, uint32_t reg, uint32_t val); > void (*emit_reg_wait)(struct amdgpu_ring *ring, uint32_t reg, > uint32_t val, uint32_t mask); @@ -265,7 +266,7 @@ struct > amdgpu_ring { #define amdgpu_ring_emit_hdp_flush(r) > (r)->funcs->emit_hdp_flush((r)) #define > amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) > #define amdgpu_ring_emit_cntxcntl(r, d) > (r)->funcs->emit_cntxcntl((r), (d)) -#define amdgpu_ring_emit_rreg(r, > d) (r)->funcs->emit_rreg((r), (d)) > +#define amdgpu_ring_emit_rreg(r, d, o) (r)->funcs->emit_rreg((r), > +(d), (o)) > #define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), > (d), (v)) #define amdgpu_ring_emit_reg_wait(r, d, v, m) > (r)->funcs->emit_reg_wait((r), (d), (v), (m)) #define > amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) > (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) diff > --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > index 0a03e2ad5d95..7c9a5e440509 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c > @@ -7594,21 +7594,19 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start, > amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1)); } > > -static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, > uint32_t reg) > +static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, > + uint64_t reg_val_offs) > { > - struct amdgpu_device *adev = ring->adev; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq; > - > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 0 | /* src: register*/ > (5 << 8) | /* dst: memory */ > (1 << 20)); /* write confirm */ > amdgpu_ring_write(ring, reg); > amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > } > > static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, > uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > index fc6c2f2bc76c..8e7eee7838e0 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c > @@ -6383,21 +6383,19 @@ static void gfx_v8_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne > ring->ring[offset] = (ring->ring_size >> 2) - offset + cur; } > > -static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, > uint32_t reg) > +static void gfx_v8_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, > + uint64_t reg_val_offs) > { > - struct amdgpu_device *adev = ring->adev; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq; > - > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 0 | /* src: register*/ > (5 << 8) | /* dst: memory */ > (1 << 20)); /* write confirm */ > amdgpu_ring_write(ring, reg); > amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > } > > static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, > uint32_t reg, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > index 84fcf842316d..ff279b1f5c24 100644 > --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c > @@ -4046,11 +4046,13 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) > uint32_t seq; > struct amdgpu_kiq *kiq = &adev->gfx.kiq; > struct amdgpu_ring *ring = &kiq->ring; > + uint64_t reg_val_offs = 0; > > BUG_ON(!ring->funcs->emit_rreg); > > spin_lock_irqsave(&kiq->ring_lock, flags); > amdgpu_ring_alloc(ring, 32); > + reg_val_offs = (ring->wptr & ring->buf_mask) + 30; > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 9 | /* src: register*/ > (5 << 8) | /* dst: memory */ > @@ -4058,10 +4060,10 @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) > (1 << 20)); /* write confirm */ > amdgpu_ring_write(ring, 0); > amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > amdgpu_fence_emit_polling(ring, &seq); > amdgpu_ring_commit(ring); > spin_unlock_irqrestore(&kiq->ring_lock, flags); @@ -4088,8 +4090,8 > @@ static uint64_t gfx_v9_0_kiq_read_clock(struct amdgpu_device *adev) > if (cnt > MAX_KIQ_REG_TRY) > goto failed_kiq_read; > > - return (uint64_t)adev->wb.wb[kiq->reg_val_offs] | > - (uint64_t)adev->wb.wb[kiq->reg_val_offs + 1 ] << 32ULL; > + return (uint64_t)ring->ring[reg_val_offs] | > + (uint64_t)ring->ring[reg_val_offs + 1 ] << 32ULL; > > failed_kiq_read: > pr_err("failed to read gpu clock\n"); @@ -5482,21 +5484,19 @@ static > void gfx_v9_0_ring_emit_patch_cond_exec(struct amdgpu_ring *ring, unsigne > ring->ring[offset] = (ring->ring_size>>2) - offset + cur; } > > -static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, > uint32_t reg) > +static void gfx_v9_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg, > + uint64_t reg_val_offs) > { > - struct amdgpu_device *adev = ring->adev; > - struct amdgpu_kiq *kiq = &adev->gfx.kiq; > - > amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4)); > amdgpu_ring_write(ring, 0 | /* src: register*/ > (5 << 8) | /* dst: memory */ > (1 << 20)); /* write confirm */ > amdgpu_ring_write(ring, reg); > amdgpu_ring_write(ring, 0); > - amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > - amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr + > - kiq->reg_val_offs * 4)); > + amdgpu_ring_write(ring, lower_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > + amdgpu_ring_write(ring, upper_32_bits(ring->gpu_addr + > + reg_val_offs * 4)); > } > > static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, > uint32_t reg, _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx