2017-06-26 15:41 GMT+02:00 Lucas Stach <l.stach@xxxxxxxxxxxxxx>: > Am Freitag, den 09.06.2017, 12:26 +0200 schrieb Christian Gmeiner: >> With 'sync points' we can sample the reqeustes perform signals >> before and/or after the submited command buffer. >> >> Signed-off-by: Christian Gmeiner <christian.gmeiner@xxxxxxxxx> >> --- >> drivers/gpu/drm/etnaviv/etnaviv_gpu.c | 112 >> +++++++++++++++++++++++++++++----- >> drivers/gpu/drm/etnaviv/etnaviv_gpu.h | 4 ++ >> 2 files changed, 102 insertions(+), 14 deletions(-) >> >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> index 0766861..2e9f031 100644 >> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c >> @@ -1313,12 +1313,47 @@ void etnaviv_gpu_pm_put(struct etnaviv_gpu >> *gpu) >> pm_runtime_put_autosuspend(gpu->dev); >> } >> >> +static void sync_point_perfmon_sample(struct etnaviv_gpu *gpu, >> + struct etnaviv_event *event, unsigned int flags) >> +{ >> + unsigned int i; >> + >> + for (i = 0; i < event->nr_pmrs; i++) { >> + const struct etnaviv_perfmon_request *pmr = event- >> >pmrs + i; >> + >> + if (pmr->flags == flags) >> + etnaviv_perfmon_process(gpu, pmr); >> + } >> +} >> + >> +static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu, >> + struct etnaviv_event *event) >> +{ >> + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_PRE); >> +} >> + >> +static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu, >> + struct etnaviv_event *event) >> +{ >> + unsigned int i; >> + >> + sync_point_perfmon_sample(gpu, event, ETNA_PM_PROCESS_POST); >> + >> + for (i = 0; i < event->nr_pmrs; i++) { >> + const struct etnaviv_perfmon_request *pmr = event- >> >pmrs + i; >> + >> + *pmr->bo_vma = pmr->sequence; >> + } >> +} >> + >> + >> /* add bo's to gpu's ring, and kick gpu: */ >> int etnaviv_gpu_submit(struct etnaviv_gpu *gpu, >> struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf >> *cmdbuf) >> { >> struct dma_fence *fence; >> unsigned int event, i; >> + unsigned int sync[2] = { ~0U, ~0U }; >> int ret; >> >> ret = etnaviv_gpu_pm_get_sync(gpu); >> @@ -1341,6 +1376,39 @@ int etnaviv_gpu_submit(struct etnaviv_gpu >> *gpu, >> goto out_pm_put; >> } >> >> + /* >> + * if there are performance monitor requests we need to have >> a sync point to >> + * re-configure gpu and process ETNA_PM_PROCESS_PRE >> requests. >> + */ >> + if (cmdbuf->nr_pmrs) { >> + sync[0] = event_alloc(gpu); >> + >> + if (unlikely(sync[0] == ~0U)) { >> + DRM_ERROR("no free events for sync point >> 0\n"); >> + event_free(gpu, event); >> + ret = -EBUSY; >> + goto out_pm_put; >> + } >> + } >> + >> + /* >> + * if there are performance monitor requests we need to have >> sync point to >> + * re-configure gpu, process ETNA_PM_PROCESS_POST requests >> and update the >> + * sequence number for userspace. >> + */ >> + if (cmdbuf->nr_pmrs) { >> + sync[1] = event_alloc(gpu); >> + >> + if (unlikely(sync[1] == ~0U)) { >> + DRM_ERROR("no free events for sync point >> 1\n"); >> + event_free(gpu, event); >> + if (unlikely(sync[0] == ~0U)) >> + event_free(gpu, sync[0]); >> + ret = -EBUSY; >> + goto out_pm_put; >> + } >> + } >> + > > This is dangerous. We aren't holding the GPU lock at this point (and we > can't because of livelocks with the GPU hangchecker), so given enough > parallel submits with PMRs all the submits might abort as they can't > allocate enough events, as each one might hold one out of the available > events. > > I think what we need here is to extend the event_alloc API to take the > number of events we need and grab them all at once under the event > spinlock. > That is a good idea - will change the event_alloc API in a separate patch. >> mutex_lock(&gpu->lock); >> >> fence = etnaviv_gpu_fence_alloc(gpu); >> @@ -1360,8 +1428,22 @@ int etnaviv_gpu_submit(struct etnaviv_gpu >> *gpu, >> gpu->lastctx = cmdbuf->ctx; >> } >> >> + if (sync[0] != ~0U) { >> + gpu->event[sync[0]].sync_point = >> &sync_point_perfmon_sample_pre; >> + gpu->event[sync[0]].nr_pmrs = cmdbuf->nr_pmrs; >> + gpu->event[sync[0]].pmrs = cmdbuf->pmrs; >> + etnaviv_sync_point_queue(gpu, sync[0]); >> + } >> + >> etnaviv_buffer_queue(gpu, event, cmdbuf); >> >> + if (sync[1] != ~0U) { >> + gpu->event[sync[1]].sync_point = >> &sync_point_perfmon_sample_post; >> + gpu->event[sync[1]].nr_pmrs = cmdbuf->nr_pmrs; >> + gpu->event[sync[1]].pmrs = cmdbuf->pmrs; >> + etnaviv_sync_point_queue(gpu, sync[1]); >> + } >> + >> cmdbuf->fence = fence; >> list_add_tail(&cmdbuf->node, &gpu->active_cmd_list); >> >> @@ -1455,20 +1537,22 @@ static irqreturn_t irq_handler(int irq, void >> *data) >> etnaviv_process_sync_point(gpu, >> &gpu->event[event]); >> >> fence = gpu->event[event].fence; >> - gpu->event[event].fence = NULL; >> - dma_fence_signal(fence); >> - >> - /* >> - * Events can be processed out of >> order. Eg, >> - * - allocate and queue event 0 >> - * - allocate event 1 >> - * - event 0 completes, we process it >> - * - allocate and queue event 0 >> - * - event 1 and event 0 complete >> - * we can end up processing event 0 first, >> then 1. >> - */ >> - if (fence_after(fence->seqno, gpu- >> >completed_fence)) >> - gpu->completed_fence = fence->seqno; >> + if (fence) { >> + gpu->event[event].fence = NULL; >> + dma_fence_signal(fence); >> + >> + /* >> + * Events can be processed out of >> order. Eg, >> + * - allocate and queue event 0 >> + * - allocate event 1 >> + * - event 0 completes, we process >> it >> + * - allocate and queue event 0 >> + * - event 1 and event 0 complete >> + * we can end up processing event 0 >> first, then 1. >> + */ >> + if (fence_after(fence->seqno, gpu- >> >completed_fence)) >> + gpu->completed_fence = >> fence->seqno; >> + } >> >> event_free(gpu, event); >> } >> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> index fee6ed9..71375ab 100644 >> --- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> +++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h >> @@ -92,6 +92,10 @@ struct etnaviv_event { >> struct dma_fence *fence; >> >> void (*sync_point)(struct etnaviv_gpu *gpu, struct >> etnaviv_event *event); >> + >> + /* performance monitor requests */ >> + unsigned int nr_pmrs; >> + struct etnaviv_perfmon_request *pmrs; > > This should be a pointer to the cmdbuf itself, so we don't copy the > information to various places. Makes sense - will be changed in v2. >> }; >> >> struct etnaviv_cmdbuf_suballoc; greets -- Christian Gmeiner, MSc https://www.youtube.com/user/AloryOFFICIAL https://soundcloud.com/christian-gmeiner _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/dri-devel