On Mon, Mar 13, 2023 at 12:19 AM Christian König <christian.koenig@xxxxxxx> wrote: > > Am 11.03.23 um 18:35 schrieb Rob Clark: > > From: Rob Clark <robdclark@xxxxxxxxxxxx> > > > > Avoid allocating memory in job_run() by embedding the fence in the > > submit object. Since msm gpu fences are always 1:1 with msm_gem_submit > > we can just use the fence's refcnt to track the submit. And since we > > can get the fence ctx from the submit we can just drop the msm_fence > > struct altogether. This uses the new dma_fence_init_noref() to deal > > with the fact that the fence's refcnt is initialized when the submit is > > created, long before job_run(). > > Well this is a very very bad idea, we made the same mistake with amdgpu > as well. > > It's true that you should not have any memory allocation in your run_job > callback, but you could also just allocate the hw fence during job > creation and initializing it later on. > > I've suggested to embed the fence into the job for amdgpu because some > people insisted of re-submitting jobs during timeout and GPU reset. This > turned into a nightmare with tons of bug fixes on top of bug fixes on > top of bug fixes because it messes up the job and fence lifetime as > defined by the DRM scheduler and DMA-buf framework. > > Luben is currently working on cleaning all this up. This actually shouldn't be a problem with msm, as the fence doesn't change if there is a gpu reset. We simply signal the fence for the offending job, reset the GPU, and re-play the remaining in-flight jobs (ie. things that already had their job_run() called) with the original fences. (We don't use gpu sched's reset/timeout handling.. when I migrated to gpu sched I kept our existing hangcheck/recovery mechanism.) BR, -R > Regards, > Christian. > > > > > Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx> > > --- > > Note that this applies on top of https://patchwork.freedesktop.org/series/93035/ > > out of convenience for myself, but I can re-work it to go before > > depending on the order that things land. > > > > drivers/gpu/drm/msm/msm_fence.c | 45 +++++++++++----------------- > > drivers/gpu/drm/msm/msm_fence.h | 2 +- > > drivers/gpu/drm/msm/msm_gem.h | 10 +++---- > > drivers/gpu/drm/msm/msm_gem_submit.c | 8 ++--- > > drivers/gpu/drm/msm/msm_gpu.c | 4 +-- > > drivers/gpu/drm/msm/msm_ringbuffer.c | 4 +-- > > 6 files changed, 31 insertions(+), 42 deletions(-) > > > > diff --git a/drivers/gpu/drm/msm/msm_fence.c b/drivers/gpu/drm/msm/msm_fence.c > > index 51b461f32103..51f9f1f0cb66 100644 > > --- a/drivers/gpu/drm/msm/msm_fence.c > > +++ b/drivers/gpu/drm/msm/msm_fence.c > > @@ -103,14 +103,9 @@ void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence) > > spin_unlock_irqrestore(&fctx->spinlock, flags); > > } > > > > -struct msm_fence { > > - struct dma_fence base; > > - struct msm_fence_context *fctx; > > -}; > > - > > -static inline struct msm_fence *to_msm_fence(struct dma_fence *fence) > > +static inline struct msm_gem_submit *fence_to_submit(struct dma_fence *fence) > > { > > - return container_of(fence, struct msm_fence, base); > > + return container_of(fence, struct msm_gem_submit, hw_fence); > > } > > > > static const char *msm_fence_get_driver_name(struct dma_fence *fence) > > @@ -120,20 +115,20 @@ static const char *msm_fence_get_driver_name(struct dma_fence *fence) > > > > static const char *msm_fence_get_timeline_name(struct dma_fence *fence) > > { > > - struct msm_fence *f = to_msm_fence(fence); > > - return f->fctx->name; > > + struct msm_gem_submit *submit = fence_to_submit(fence); > > + return submit->ring->fctx->name; > > } > > > > static bool msm_fence_signaled(struct dma_fence *fence) > > { > > - struct msm_fence *f = to_msm_fence(fence); > > - return msm_fence_completed(f->fctx, f->base.seqno); > > + struct msm_gem_submit *submit = fence_to_submit(fence); > > + return msm_fence_completed(submit->ring->fctx, fence->seqno); > > } > > > > static void msm_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) > > { > > - struct msm_fence *f = to_msm_fence(fence); > > - struct msm_fence_context *fctx = f->fctx; > > + struct msm_gem_submit *submit = fence_to_submit(fence); > > + struct msm_fence_context *fctx = submit->ring->fctx; > > unsigned long flags; > > ktime_t now; > > > > @@ -165,26 +160,22 @@ static void msm_fence_set_deadline(struct dma_fence *fence, ktime_t deadline) > > spin_unlock_irqrestore(&fctx->spinlock, flags); > > } > > > > +static void msm_fence_release(struct dma_fence *fence) > > +{ > > + __msm_gem_submit_destroy(fence_to_submit(fence)); > > +} > > + > > static const struct dma_fence_ops msm_fence_ops = { > > .get_driver_name = msm_fence_get_driver_name, > > .get_timeline_name = msm_fence_get_timeline_name, > > .signaled = msm_fence_signaled, > > .set_deadline = msm_fence_set_deadline, > > + .release = msm_fence_release, > > }; > > > > -struct dma_fence * > > -msm_fence_alloc(struct msm_fence_context *fctx) > > +void > > +msm_fence_init(struct msm_fence_context *fctx, struct dma_fence *f) > > { > > - struct msm_fence *f; > > - > > - f = kzalloc(sizeof(*f), GFP_KERNEL); > > - if (!f) > > - return ERR_PTR(-ENOMEM); > > - > > - f->fctx = fctx; > > - > > - dma_fence_init(&f->base, &msm_fence_ops, &fctx->spinlock, > > - fctx->context, ++fctx->last_fence); > > - > > - return &f->base; > > + dma_fence_init_noref(f, &msm_fence_ops, &fctx->spinlock, > > + fctx->context, ++fctx->last_fence); > > } > > diff --git a/drivers/gpu/drm/msm/msm_fence.h b/drivers/gpu/drm/msm/msm_fence.h > > index cdaebfb94f5c..8fca37e9773b 100644 > > --- a/drivers/gpu/drm/msm/msm_fence.h > > +++ b/drivers/gpu/drm/msm/msm_fence.h > > @@ -81,7 +81,7 @@ void msm_fence_context_free(struct msm_fence_context *fctx); > > bool msm_fence_completed(struct msm_fence_context *fctx, uint32_t fence); > > void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence); > > > > -struct dma_fence * msm_fence_alloc(struct msm_fence_context *fctx); > > +void msm_fence_init(struct msm_fence_context *fctx, struct dma_fence *f); > > > > static inline bool > > fence_before(uint32_t a, uint32_t b) > > diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h > > index c4844cf3a585..e06afed99d5b 100644 > > --- a/drivers/gpu/drm/msm/msm_gem.h > > +++ b/drivers/gpu/drm/msm/msm_gem.h > > @@ -259,10 +259,10 @@ struct msm_gem_submit { > > struct ww_acquire_ctx ticket; > > uint32_t seqno; /* Sequence number of the submit on the ring */ > > > > - /* Hw fence, which is created when the scheduler executes the job, and > > + /* Hw fence, which is initialized when the scheduler executes the job, and > > * is signaled when the hw finishes (via seqno write from cmdstream) > > */ > > - struct dma_fence *hw_fence; > > + struct dma_fence hw_fence; > > > > /* Userspace visible fence, which is signaled by the scheduler after > > * the hw_fence is signaled. > > @@ -309,16 +309,16 @@ static inline struct msm_gem_submit *to_msm_submit(struct drm_sched_job *job) > > return container_of(job, struct msm_gem_submit, base); > > } > > > > -void __msm_gem_submit_destroy(struct kref *kref); > > +void __msm_gem_submit_destroy(struct msm_gem_submit *submit); > > > > static inline void msm_gem_submit_get(struct msm_gem_submit *submit) > > { > > - kref_get(&submit->ref); > > + dma_fence_get(&submit->hw_fence); > > } > > > > static inline void msm_gem_submit_put(struct msm_gem_submit *submit) > > { > > - kref_put(&submit->ref, __msm_gem_submit_destroy); > > + dma_fence_put(&submit->hw_fence); > > } > > > > void msm_submit_retire(struct msm_gem_submit *submit); > > diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c > > index be4bf77103cd..522c8c82e827 100644 > > --- a/drivers/gpu/drm/msm/msm_gem_submit.c > > +++ b/drivers/gpu/drm/msm/msm_gem_submit.c > > @@ -47,7 +47,7 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, > > return ERR_PTR(ret); > > } > > > > - kref_init(&submit->ref); > > + kref_init(&submit->hw_fence.refcount); > > submit->dev = dev; > > submit->aspace = queue->ctx->aspace; > > submit->gpu = gpu; > > @@ -65,10 +65,9 @@ static struct msm_gem_submit *submit_create(struct drm_device *dev, > > return submit; > > } > > > > -void __msm_gem_submit_destroy(struct kref *kref) > > +/* Called when the hw_fence is destroyed: */ > > +void __msm_gem_submit_destroy(struct msm_gem_submit *submit) > > { > > - struct msm_gem_submit *submit = > > - container_of(kref, struct msm_gem_submit, ref); > > unsigned i; > > > > if (submit->fence_id) { > > @@ -78,7 +77,6 @@ void __msm_gem_submit_destroy(struct kref *kref) > > } > > > > dma_fence_put(submit->user_fence); > > - dma_fence_put(submit->hw_fence); > > > > put_pid(submit->pid); > > msm_submitqueue_put(submit->queue); > > diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c > > index 380249500325..a82d11dd5fcf 100644 > > --- a/drivers/gpu/drm/msm/msm_gpu.c > > +++ b/drivers/gpu/drm/msm/msm_gpu.c > > @@ -716,7 +716,7 @@ static void retire_submits(struct msm_gpu *gpu) > > * been signalled, then later submits are not signalled > > * either, so we are also done. > > */ > > - if (submit && dma_fence_is_signaled(submit->hw_fence)) { > > + if (submit && dma_fence_is_signaled(&submit->hw_fence)) { > > retire_submit(gpu, ring, submit); > > } else { > > break; > > @@ -760,7 +760,7 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit) > > > > msm_gpu_hw_init(gpu); > > > > - submit->seqno = submit->hw_fence->seqno; > > + submit->seqno = submit->hw_fence.seqno; > > > > msm_rd_dump_submit(priv->rd, submit, NULL); > > > > diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c > > index 57a8e9564540..5c54befa2427 100644 > > --- a/drivers/gpu/drm/msm/msm_ringbuffer.c > > +++ b/drivers/gpu/drm/msm/msm_ringbuffer.c > > @@ -18,7 +18,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) > > struct msm_gpu *gpu = submit->gpu; > > int i; > > > > - submit->hw_fence = msm_fence_alloc(fctx); > > + msm_fence_init(fctx, &submit->hw_fence); > > > > for (i = 0; i < submit->nr_bos; i++) { > > struct drm_gem_object *obj = &submit->bos[i].obj->base; > > @@ -37,7 +37,7 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job) > > > > mutex_unlock(&gpu->lock); > > > > - return dma_fence_get(submit->hw_fence); > > + return dma_fence_get(&submit->hw_fence); > > } > > > > static void msm_job_free(struct drm_sched_job *job) >