Am 2021-11-20 um 9:58 p.m. schrieb Philip Yang: > IH ring1 is used to process GPU retry fault, overflow is enabled to > drain retry fault because we want receive other interrupts while > handling retry fault to recover range. There is no overflow flag set > when wptr pass rptr. Use timestamp of rptr and wptr to handle overflow > and drain retry fault. > > Add helper function amdgpu_ih_decode_iv_ts to get 48bit timestamp from > IV entry. drain retry fault check timestamp of rptr is larger than > timestamp of (checkpoint_wptr - 32). > > Add function amdgpu_ih_process1 to process IH ring1 until timestamp of > rptr is larger then timestamp of (rptr + 32). > > Enable navi asics 48bit time stamp in IV. > > Helper amdgpu_ih_ts_after to compare time stamps with 48bit wrap around. > > Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c | 109 +++++++++++++++++++----- > drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h | 9 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c | 2 +- > drivers/gpu/drm/amd/amdgpu/navi10_ih.c | 2 + > drivers/gpu/drm/amd/amdgpu/vega10_ih.c | 1 + > drivers/gpu/drm/amd/amdgpu/vega20_ih.c | 1 + > drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 2 +- > 7 files changed, 99 insertions(+), 27 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > index f3d62e196901..17f7f8173bfb 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c > @@ -164,52 +164,52 @@ void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, > } > } > > +/* return true if time stamp t2 is after t1 with 48bit wrap around */ > +static inline bool amdgpu_ih_ts_after(uint64_t t1, uint64_t t2) > +{ > + return ((t1 < t2 && (t2 - t1) < (1ULL << 47)) || > + (t1 > t2 && (t1 - t2) > (1ULL << 47))); There is a more straight-forward way to do this: return ((int64_t)(t2 << 16) - (t1 << 16)) > 0; > +} > + > /* Waiter helper that checks current rptr matches or passes checkpoint wptr */ > -static bool amdgpu_ih_has_checkpoint_processed(struct amdgpu_device *adev, > +static bool amdgpu_ih_has_checkpoint_processed_ts(struct amdgpu_device *adev, > struct amdgpu_ih_ring *ih, > - uint32_t checkpoint_wptr, > - uint32_t *prev_rptr) > + uint64_t checkpoint_ts) > { > - uint32_t cur_rptr = ih->rptr | (*prev_rptr & ~ih->ptr_mask); > + uint64_t ts; > > - /* rptr has wrapped. */ > - if (cur_rptr < *prev_rptr) > - cur_rptr += ih->ptr_mask + 1; > - *prev_rptr = cur_rptr; > - > - /* check ring is empty to workaround missing wptr overflow flag */ > - return cur_rptr >= checkpoint_wptr || > - (cur_rptr & ih->ptr_mask) == amdgpu_ih_get_wptr(adev, ih); > + /* After wakeup, ih->rptr is the entry which is being processed, check > + * the timestamp of previous entry which is processed. > + */ > + ts = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr - 32); I still disagree with hard-coding -32 here and in a few other places that are not IP version specific code, or a helper called from IP-version specific code. You could abstract this away by adding an offset parameter to amdgpu_ih_decode_iv_ts. Then the IP-version specific code can multiply that with the actual IV size. In this case you'd use offset -1 and amdgpu_ih_decode_iv_ts_helper would multiply that with 32 and add it to rptr. > + return checkpoint_ts == ts || amdgpu_ih_ts_after(checkpoint_ts, ts); This could be done with a single condition like this: return !amdgpu_ih_ts_before(checkpoint_ts, ts); > } > > /** > - * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs up to checkpoint > + * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process IVs up to checkpoint > * > * @adev: amdgpu_device pointer > * @ih: ih ring to process > * > * Used to ensure ring has processed IVs up to the checkpoint write pointer. > */ > -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, > +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, > struct amdgpu_ih_ring *ih) > { > - uint32_t checkpoint_wptr, rptr; > + uint32_t checkpoint_wptr; > + uint64_t checkpoint_ts; > > if (!ih->enabled || adev->shutdown) > return -ENODEV; > > checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih); > - /* Order wptr with rptr. */ > + /* Order wptr with ring data. */ > rmb(); > - rptr = READ_ONCE(ih->rptr); > - > - /* wptr has wrapped. */ > - if (rptr > checkpoint_wptr) > - checkpoint_wptr += ih->ptr_mask + 1; > + checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih, checkpoint_wptr - 32); Same as above. > > return wait_event_interruptible(ih->wait_process, > - amdgpu_ih_has_checkpoint_processed(adev, ih, > - checkpoint_wptr, &rptr)); > + amdgpu_ih_has_checkpoint_processed_ts(adev, ih, > + checkpoint_ts)); > } > > /** > @@ -253,6 +253,59 @@ int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) > return IRQ_HANDLED; > } > > +/** > + * amdgpu_ih_process1 - interrupt handler work for IH ring1 > + * > + * @adev: amdgpu_device pointer > + * @ih: ih ring to process > + * > + * Interrupt handler of IH ring1, walk the IH ring1. > + * Returns irq process return code. > + */ > +int amdgpu_ih_process1(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) > +{ > + uint64_t ts, ts_next; > + unsigned int count; > + u32 wptr; > + > + if (dev_WARN_ONCE(adev->dev, ih != &adev->irq.ih1, "not ring1")) > + return 0; > + > + if (!ih->enabled || adev->shutdown) > + return IRQ_NONE; > + > + wptr = amdgpu_ih_get_wptr(adev, ih); > + if (ih->rptr == wptr) > + return 0; > + > +restart_ih: > + count = AMDGPU_IH_MAX_NUM_IVS; > + > + ts = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr); > + ts_next = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr + 32); Same as above. > + while (amdgpu_ih_ts_after(ts, ts_next) && --count) { > + amdgpu_irq_dispatch(adev, ih); > + ih->rptr &= ih->ptr_mask; > + ts = ts_next; > + ts_next = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr + 32); Same as above. > + } > + /* > + * Process the last timestamp updated entry or one more entry > + * if count = 0, ts is timestamp of the entry. > + */ > + amdgpu_irq_dispatch(adev, ih); > + amdgpu_ih_set_rptr(adev, ih); > + wake_up_all(&ih->wait_process); > + > + wptr = amdgpu_ih_get_wptr(adev, ih); > + /* Order reading of wptr vs. reading of IH ring data */ > + rmb(); > + if (amdgpu_ih_ts_after(ts, amdgpu_ih_decode_iv_ts(adev, ih, wptr - 32))) Same as above. > + goto restart_ih; > + > + return IRQ_HANDLED; > +} > + > /** > * amdgpu_ih_decode_iv_helper - decode an interrupt vector > * > @@ -298,3 +351,13 @@ void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, > /* wptr/rptr are in bytes! */ > ih->rptr += 32; > } > + > +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr) > +{ > + uint32_t index = (rptr & ih->ptr_mask) >> 2; > + uint32_t dw1, dw2; > + > + dw1 = ih->ring[index + 1]; > + dw2 = ih->ring[index + 2]; > + return dw1 | ((u64)(dw2 & 0xffff) << 32); > +} > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > index 0649b59830a5..edfa0a18a123 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h > @@ -76,12 +76,15 @@ struct amdgpu_ih_funcs { > u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); > void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, > struct amdgpu_iv_entry *entry); > + uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32 rptr); > void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); > }; > > #define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih)) > #define amdgpu_ih_decode_iv(adev, iv) \ > (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv)) > +#define amdgpu_ih_decode_iv_ts(adev, ih, rptr) \ > + ((adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr))) Since you're not populating this for older ASICs, it would be good to have a NULL pointer check here and return 0 as a fallback and maybe print a WARN_ON_ONCE because checking a timestamp on an ASIC that doesn't provide it would result in unexpected behaviour: #define amdgpu_ih_decode_iv_ts(adev, ih, rptr) \ (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ? 0 : \ (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr))) > #define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih)) > > int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, > @@ -89,10 +92,12 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, > void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); > void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const uint32_t *iv, > unsigned int num_dw); > -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device *adev, > - struct amdgpu_ih_ring *ih); > +int amdgpu_ih_wait_on_checkpoint_process_ts(struct amdgpu_device *adev, > + struct amdgpu_ih_ring *ih); > int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); > +int amdgpu_ih_process1(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); > void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev, > struct amdgpu_ih_ring *ih, > struct amdgpu_iv_entry *entry); > +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring *ih, u32 rptr); > #endif > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > index e9023687dc9a..891486cca94b 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c > @@ -224,7 +224,7 @@ static void amdgpu_irq_handle_ih1(struct work_struct *work) > struct amdgpu_device *adev = container_of(work, struct amdgpu_device, > irq.ih1_work); > > - amdgpu_ih_process(adev, &adev->irq.ih1); > + amdgpu_ih_process1(adev, &adev->irq.ih1); > } > > /** > diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > index 1d8414c3fadb..1af1358f9650 100644 > --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c > @@ -160,6 +160,7 @@ static int navi10_ih_toggle_ring_interrupts(struct amdgpu_device *adev, > > tmp = RREG32(ih_regs->ih_rb_cntl); > tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_ENABLE, (enable ? 1 : 0)); > + tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, RB_GPU_TS_ENABLE, 1); Maybe this should be in a separate patch. Regards, Felix > /* enable_intr field is only valid in ring0 */ > if (ih == &adev->irq.ih) > tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, ENABLE_INTR, (enable ? 1 : 0)); > @@ -724,6 +725,7 @@ static const struct amd_ip_funcs navi10_ih_ip_funcs = { > static const struct amdgpu_ih_funcs navi10_ih_funcs = { > .get_wptr = navi10_ih_get_wptr, > .decode_iv = amdgpu_ih_decode_iv_helper, > + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, > .set_rptr = navi10_ih_set_rptr > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > index a9ca6988009e..3070466f54e1 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c > @@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { > static const struct amdgpu_ih_funcs vega10_ih_funcs = { > .get_wptr = vega10_ih_get_wptr, > .decode_iv = amdgpu_ih_decode_iv_helper, > + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, > .set_rptr = vega10_ih_set_rptr > }; > > diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > index f51dfc38ac65..3b4eb8285943 100644 > --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c > @@ -688,6 +688,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs = { > static const struct amdgpu_ih_funcs vega20_ih_funcs = { > .get_wptr = vega20_ih_get_wptr, > .decode_iv = amdgpu_ih_decode_iv_helper, > + .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper, > .set_rptr = vega20_ih_set_rptr > }; > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > index 10868d5b549f..663489ae56d7 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c > @@ -1974,7 +1974,7 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) > > pr_debug("drain retry fault gpu %d svms %p\n", i, svms); > > - amdgpu_ih_wait_on_checkpoint_process(pdd->dev->adev, > + amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev, > &pdd->dev->adev->irq.ih1); > pr_debug("drain retry fault gpu %d svms 0x%p done\n", i, svms); > }