Use the semaphore mechanism to make this happen, this uses signaling from the cpu instead of signaling by the gpu. Signed-off-by: Maarten Lankhorst <maarten.lankhorst@xxxxxxxxxxxxx> --- drivers/gpu/drm/radeon/radeon.h | 17 ++- drivers/gpu/drm/radeon/radeon_cs.c | 30 ++--- drivers/gpu/drm/radeon/radeon_fence.c | 13 ++- drivers/gpu/drm/radeon/radeon_semaphore.c | 184 ++++++++++++++++++++++++++++++ 4 files changed, 221 insertions(+), 23 deletions(-) diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index dddb2b7dd752..cd18fa7f801c 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -359,6 +359,11 @@ struct radeon_fence_driver { struct delayed_work lockup_work; }; +struct radeon_fence_cb { + struct fence_cb base; + struct fence *fence; +}; + struct radeon_fence { struct fence base; @@ -368,6 +373,10 @@ struct radeon_fence { unsigned ring; wait_queue_t fence_wake; + + atomic_t num_cpu_cbs; + struct radeon_fence_cb *cpu_cbs; + uint32_t *cpu_sema; }; int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring); @@ -574,9 +583,11 @@ int radeon_mode_dumb_mmap(struct drm_file *filp, */ struct radeon_semaphore { struct radeon_sa_bo *sa_bo; - signed waiters; + signed waiters, cpu_waiters, cpu_waiters_max; uint64_t gpu_addr; struct radeon_fence *sync_to[RADEON_NUM_RINGS]; + uint32_t *cpu_sema; + struct radeon_fence_cb *cpu_cbs; }; int radeon_semaphore_create(struct radeon_device *rdev, @@ -587,6 +598,10 @@ bool radeon_semaphore_emit_wait(struct radeon_device *rdev, int ring, struct radeon_semaphore *semaphore); void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, struct radeon_fence *fence); +int radeon_semaphore_sync_obj(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, + struct reservation_object *resv); + int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, int waiting_ring); diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c index 8ad4e2cfae15..b141f5bd029d 100644 --- a/drivers/gpu/drm/radeon/radeon_cs.c +++ b/drivers/gpu/drm/radeon/radeon_cs.c @@ -250,32 +250,16 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority static int radeon_cs_sync_rings(struct radeon_cs_parser *p) { - int i; - - for (i = 0; i < p->nrelocs; i++) { - struct reservation_object *resv; - struct fence *fence; - struct radeon_fence *rfence; - int r; + int i, ret = 0; + for (i = 0; !ret && i < p->nrelocs; i++) { if (!p->relocs[i].robj) continue; - resv = p->relocs[i].robj->tbo.resv; - fence = reservation_object_get_excl(resv); - if (!fence) - continue; - rfence = to_radeon_fence(fence); - if (!rfence || rfence->rdev != p->rdev) { - r = fence_wait(fence, true); - if (r) - return r; - continue; - } - - radeon_semaphore_sync_to(p->ib.semaphore, rfence); + ret = radeon_semaphore_sync_obj(p->rdev, p->ib.semaphore, + p->relocs[i].robj->tbo.resv); } - return 0; + return ret; } /* XXX: note that this is called from the legacy UMS CS ioctl as well */ @@ -442,6 +426,10 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo */ list_sort(NULL, &parser->validated, cmp_size_smaller_first); + /* must be called with all reservation_objects still held */ + radeon_semaphore_free(parser->rdev, &parser->ib.semaphore, + parser->ib.fence); + ttm_eu_fence_buffer_objects(&parser->ticket, &parser->validated, &parser->ib.fence->base); diff --git a/drivers/gpu/drm/radeon/radeon_fence.c b/drivers/gpu/drm/radeon/radeon_fence.c index 0262fe2580d2..7687a7f8f41b 100644 --- a/drivers/gpu/drm/radeon/radeon_fence.c +++ b/drivers/gpu/drm/radeon/radeon_fence.c @@ -142,6 +142,8 @@ int radeon_fence_emit(struct radeon_device *rdev, (*fence)->ring = ring; fence_init(&(*fence)->base, &radeon_fence_ops, &rdev->fence_queue.lock, rdev->fence_context + ring, seq); + (*fence)->cpu_cbs = NULL; + (*fence)->cpu_sema = NULL; radeon_fence_ring_emit(rdev, ring, *fence); trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq); radeon_fence_schedule_check(rdev, ring); @@ -1057,11 +1059,20 @@ static signed long radeon_fence_default_wait(struct fence *f, bool intr, return t; } +static void __radeon_fence_destroy(struct fence *f) +{ + struct radeon_fence *fence = to_radeon_fence(f); + + WARN_ON(fence->cpu_cbs); + kfree(fence->cpu_cbs); + fence_free(f); +} + const struct fence_ops radeon_fence_ops = { .get_driver_name = radeon_fence_get_driver_name, .get_timeline_name = radeon_fence_get_timeline_name, .enable_signaling = radeon_fence_enable_signaling, .signaled = radeon_fence_is_signaled, .wait = radeon_fence_default_wait, - .release = NULL, + .release = __radeon_fence_destroy, }; diff --git a/drivers/gpu/drm/radeon/radeon_semaphore.c b/drivers/gpu/drm/radeon/radeon_semaphore.c index 56d9fd66d8ae..2e71463d11c5 100644 --- a/drivers/gpu/drm/radeon/radeon_semaphore.c +++ b/drivers/gpu/drm/radeon/radeon_semaphore.c @@ -30,6 +30,7 @@ #include <drm/drmP.h> #include "radeon.h" #include "radeon_trace.h" +#include <trace/events/fence.h> int radeon_semaphore_create(struct radeon_device *rdev, struct radeon_semaphore **semaphore) @@ -49,7 +50,11 @@ int radeon_semaphore_create(struct radeon_device *rdev, return r; } (*semaphore)->waiters = 0; + (*semaphore)->cpu_waiters = 0; + (*semaphore)->cpu_waiters_max = 0; (*semaphore)->gpu_addr = radeon_sa_bo_gpu_addr((*semaphore)->sa_bo); + (*semaphore)->cpu_sema = NULL; + (*semaphore)->cpu_cbs = NULL; cpu_addr = radeon_sa_bo_cpu_addr((*semaphore)->sa_bo); for (i = 0; i < RADEON_NUM_SYNCS; ++i) @@ -115,6 +120,101 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, semaphore->sync_to[fence->ring] = radeon_fence_later(fence, other); } +int radeon_semaphore_reserve_cpu_waiters(struct radeon_semaphore *semaphore, int add) +{ + int max = 4; + struct radeon_fence_cb *cpu_cbs; + + if (semaphore->cpu_waiters + add <= semaphore->cpu_waiters_max) + return 0; + + if (semaphore->cpu_waiters_max) + max = semaphore->cpu_waiters_max * 2; + + cpu_cbs = krealloc(semaphore->cpu_cbs, max * sizeof(*cpu_cbs), GFP_KERNEL); + if (!cpu_cbs) + return -ENOMEM; + semaphore->cpu_cbs = cpu_cbs; + semaphore->cpu_waiters_max = max; + return 0; +} + +static void radeon_semaphore_add_cpu_cb(struct radeon_semaphore *semaphore, + struct fence *fence) +{ + unsigned i; + struct radeon_fence_cb *empty = NULL; + + for (i = 0; i < semaphore->cpu_waiters; ++i) { + struct fence *other = semaphore->cpu_cbs[i].fence; + + if (!other) + empty = &semaphore->cpu_cbs[i]; + else if (other->context == fence->context) { + semaphore->cpu_cbs[i].fence = fence_later(other, fence); + return; + } + } + + if (!empty) + empty = &semaphore->cpu_cbs[semaphore->cpu_waiters++]; + + empty->fence = fence; + return; +} + +/** + * radeon_semaphore_sync_obj - use the semaphore to sync to a bo + * + * @semaphore: semaphore object to add fence to + * @resv: the reservation_object to sync to + * + * Sync the reservation_object using this semaphore. + * + * radeon_semaphore_free must be called with all reservation_object locks + * still held!!! + */ +int radeon_semaphore_sync_obj(struct radeon_device *rdev, + struct radeon_semaphore *semaphore, + struct reservation_object *resv) +{ + struct fence *fence; + struct radeon_fence *rfence; + struct reservation_object_list *fobj; + int ret, i; + + fobj = reservation_object_get_list(resv); + if (fobj && fobj->shared_count) { + ret = radeon_semaphore_reserve_cpu_waiters(semaphore, fobj->shared_count); + if (ret) + return ret; + for (i = 0; i < fobj->shared_count; ++i) { + fence = rcu_dereference_protected(fobj->shared[i], + reservation_object_held(resv)); + + radeon_semaphore_add_cpu_cb(semaphore, fence); + } + return 0; + } + + fence = reservation_object_get_excl(resv); + if (!fence) + return 0; + + rfence = to_radeon_fence(fence); + if (rfence && rfence->rdev == rdev) { + struct radeon_fence *other = semaphore->sync_to[rfence->ring]; + + semaphore->sync_to[rfence->ring] = + radeon_fence_later(rfence, other); + return 0; + } + ret = radeon_semaphore_reserve_cpu_waiters(semaphore, 1); + if (!ret) + radeon_semaphore_add_cpu_cb(semaphore, fence); + return ret; +} + /** * radeon_semaphore_sync_rings - sync ring to all registered fences * @@ -124,6 +224,8 @@ void radeon_semaphore_sync_to(struct radeon_semaphore *semaphore, * * Ensure that all registered fences are signaled before letting * the ring continue. The caller must hold the ring lock. + * + * This function may only be called once on a semaphore. */ int radeon_semaphore_sync_rings(struct radeon_device *rdev, struct radeon_semaphore *semaphore, @@ -132,6 +234,16 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, unsigned count = 0; int i, r; + if (semaphore->cpu_waiters) { + /* allocate enough space for sync command */ + if (radeon_semaphore_emit_wait(rdev, ring, semaphore)) { + semaphore->cpu_sema = radeon_sa_bo_cpu_addr(semaphore->sa_bo); + semaphore->gpu_addr += 8; + ++count; + } else + semaphore->cpu_waiters = -1; + } + for (i = 0; i < RADEON_NUM_RINGS; ++i) { struct radeon_fence *fence = semaphore->sync_to[i]; @@ -188,6 +300,68 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev, return 0; } +static void radeon_semaphore_cpu_trigger(struct fence *other_fence, + struct fence_cb *fence_cb) +{ + struct radeon_fence_cb *cb = (struct radeon_fence_cb*)fence_cb; + struct radeon_fence *fence = (struct radeon_fence *)cb->fence; + +#ifdef CONFIG_FENCE_TRACE + int ret = atomic_dec_return(&fence->num_cpu_cbs); + + if (ret) + FENCE_TRACE(&fence->base, "triggered from %u#%u, %i remaining\n", + ret, other_fence->context, other_fence->seqno); + else +#else + if (atomic_dec_and_test(&fence->num_cpu_cbs)) +#endif + { + FENCE_TRACE(&fence->base, "triggered from %u#%u, starting work\n", + other_fence->context, other_fence->seqno); + + *fence->cpu_sema = ~0; + + kfree(fence->cpu_cbs); + fence->cpu_cbs = NULL; + } +} + +static void radeon_semaphore_arm_cpu_cbs(struct radeon_semaphore *semaphore, + struct radeon_fence *fence) +{ + unsigned i, skipped = 0; + + fence->cpu_cbs = semaphore->cpu_cbs; + fence->cpu_sema = semaphore->cpu_sema; + atomic_set(&fence->num_cpu_cbs, semaphore->cpu_waiters); + + for (i = 0; i < semaphore->cpu_waiters; ++i) { + struct fence *other = fence->cpu_cbs[i].fence; + + if (other) { + fence->cpu_cbs[i].fence = &fence->base; + trace_fence_annotate_wait_on(&fence->base, other); + + FENCE_TRACE(&fence->base, "queued wait on %u#%u\n", + other->context, other->seqno); + + if (!fence_add_callback(other, &fence->cpu_cbs[i].base, + radeon_semaphore_cpu_trigger)) + continue; + } + skipped++; + } + + if (skipped && atomic_sub_and_test(skipped, &fence->num_cpu_cbs)) { + FENCE_TRACE(&fence->base, "No triggers, starting..\n"); + + *fence->cpu_sema = ~0; + kfree(fence->cpu_cbs); + fence->cpu_cbs = NULL; + } +} + void radeon_semaphore_free(struct radeon_device *rdev, struct radeon_semaphore **semaphore, struct radeon_fence *fence) @@ -195,6 +369,16 @@ void radeon_semaphore_free(struct radeon_device *rdev, if (semaphore == NULL || *semaphore == NULL) { return; } + if ((*semaphore)->cpu_cbs) { + (*semaphore)->waiters--; + + if (!fence) { + *(*semaphore)->cpu_sema = ~0U; + kfree((*semaphore)->cpu_cbs); + } else + radeon_semaphore_arm_cpu_cbs(*semaphore, fence); + } + if ((*semaphore)->waiters > 0) { dev_err(rdev->dev, "semaphore %p has more waiters than signalers," " hardware lockup imminent!\n", *semaphore); -- 2.0.4 _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel