1, This is necessary otherwise how can I access entity pointer after a job timedout , and why it is dangerous ? 2, what's the status field in the fences you were referring to ? I need to judge if it could satisfy my requirement -----Original Message----- From: Christian König [mailto:deathsimple@xxxxxxxxxxx] Sent: Monday, May 01, 2017 10:48 PM To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org Subject: Re: [PATCH 1/5] drm/amdgpu:keep ctx alive till all job finished Am 01.05.2017 um 09:22 schrieb Monk Liu: > for TDR guilty context feature, we need access ctx/s_entity field > member through sched job pointer,so ctx must keep alive till all job > from it signaled. NAK, that is unnecessary and quite dangerous. Instead we have the designed status field in the fences which should be checked for that. Regards, Christian. > > Change-Id: Ib87e9502f7a5c8c054c7e56956d7f7ad75998e43 > Signed-off-by: Monk Liu <Monk.Liu at amd.com> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 6 +++++- > drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 2 +- > drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 9 +++++++++ > drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 9 +++++++-- > drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 6 ------ > drivers/gpu/drm/amd/scheduler/gpu_scheduler.h | 1 + > 6 files changed, 23 insertions(+), 10 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > index e330009..8e031d6 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > @@ -760,10 +760,12 @@ struct amdgpu_ib { > uint32_t flags; > }; > > +struct amdgpu_ctx; > + > extern const struct amd_sched_backend_ops amdgpu_sched_ops; > > int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > - struct amdgpu_job **job, struct amdgpu_vm *vm); > + struct amdgpu_job **job, struct amdgpu_vm *vm, struct > +amdgpu_ctx *ctx); > int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, > struct amdgpu_job **job); > > @@ -802,6 +804,7 @@ struct amdgpu_ctx_mgr { > > struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); > int amdgpu_ctx_put(struct amdgpu_ctx *ctx); > +struct amdgpu_ctx *amdgpu_ctx_kref_get(struct amdgpu_ctx *ctx); > > uint64_t amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, > struct fence *fence); > @@ -1129,6 +1132,7 @@ struct amdgpu_job { > struct amdgpu_sync sync; > struct amdgpu_ib *ibs; > struct fence *fence; /* the hw fence */ > + struct amdgpu_ctx *ctx; > uint32_t preamble_status; > uint32_t num_ibs; > void *owner; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > index 699f5fe..267fb65 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c > @@ -234,7 +234,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data) > } > } > > - ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm); > + ret = amdgpu_job_alloc(p->adev, num_ibs, &p->job, vm, p->ctx); > if (ret) > goto free_all_kdata; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > index b4bbbb3..81438af 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c > @@ -25,6 +25,13 @@ > #include <drm/drmP.h> > #include "amdgpu.h" > > +struct amdgpu_ctx *amdgpu_ctx_kref_get(struct amdgpu_ctx *ctx) { > + if (ctx) > + kref_get(&ctx->refcount); > + return ctx; > +} > + > static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) > { > unsigned i, j; > @@ -56,6 +63,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct amdgpu_ctx *ctx) > rq, amdgpu_sched_jobs); > if (r) > goto failed; > + > + ctx->rings[i].entity.ptr_guilty = &ctx->guilty; /* kernel entity > +doesn't have ptr_guilty */ > } > > return 0; > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > index 690ef3d..208da11 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c > @@ -40,7 +40,7 @@ static void amdgpu_job_timedout(struct amd_sched_job *s_job) > } > > int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > - struct amdgpu_job **job, struct amdgpu_vm *vm) > + struct amdgpu_job **job, struct amdgpu_vm *vm, struct > +amdgpu_ctx *ctx) > { > size_t size = sizeof(struct amdgpu_job); > > @@ -57,6 +57,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, > (*job)->vm = vm; > (*job)->ibs = (void *)&(*job)[1]; > (*job)->num_ibs = num_ibs; > + (*job)->ctx = amdgpu_ctx_kref_get(ctx); > > amdgpu_sync_create(&(*job)->sync); > > @@ -68,7 +69,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, > { > int r; > > - r = amdgpu_job_alloc(adev, 1, job, NULL); > + r = amdgpu_job_alloc(adev, 1, job, NULL, NULL); > if (r) > return r; > > @@ -94,6 +95,10 @@ void amdgpu_job_free_resources(struct amdgpu_job *job) > static void amdgpu_job_free_cb(struct amd_sched_job *s_job) > { > struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, > base); > + struct amdgpu_ctx *ctx = job->ctx; > + > + if (ctx) > + amdgpu_ctx_put(ctx); > > fence_put(job->fence); > amdgpu_sync_free(&job->sync); > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > index 6f4e31f..9100ca8 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c > @@ -208,12 +208,6 @@ void amd_sched_entity_fini(struct amd_gpu_scheduler *sched, > if (!amd_sched_entity_is_initialized(sched, entity)) > return; > > - /** > - * The client will not queue more IBs during this fini, consume existing > - * queued IBs > - */ > - wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity)); > - > amd_sched_rq_remove_entity(rq, entity); > kfifo_free(&entity->job_queue); > } > diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > index 8cb41d3..ccbbcb0 100644 > --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h > @@ -49,6 +49,7 @@ struct amd_sched_entity { > > struct fence *dependency; > struct fence_cb cb; > + bool *ptr_guilty; > }; > > /**