On Mon, May 7, 2012 at 11:04 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > On 07.05.2012 16:39, Jerome Glisse wrote: >> >> On Mon, May 7, 2012 at 7:42 AM, Christian König<deathsimple@xxxxxxxxxxx> >> wrote: >>> >>> From: Jerome Glisse<jglisse@xxxxxxxxxx> >>> >>> This convert fence to use uint64_t sequence number intention is >>> to use the fact that uin64_t is big enough that we don't need to >>> care about wrap around. >>> >>> Tested with and without writeback using 0xFFFFF000 as initial >>> fence sequence and thus allowing to test the wrap around from >>> 32bits to 64bits. >>> >>> v2: Add comment about possible race btw CPU& GPU, add comment >>> >>> stressing that we need 2 dword aligned for R600_WB_EVENT_OFFSET >>> Read fence sequenc in reverse order of GPU write them so we >>> mitigate the race btw CPU and GPU. >>> >>> v3: Drop the need for ring to emit the 64bits fence, and just have >>> each ring emit the lower 32bits of the fence sequence. We >>> handle the wrap over 32bits in fence_process. >>> >>> v4: Just a small optimization: Don't reread the last_seq value >>> if loop restarts, since we already know its value anyway. >>> Also start at zero not one for seq value and use pre instead >>> of post increment in emmit, otherwise wait_empty will deadlock. >> >> Why changing that v3 was already good no deadlock. I started at 1 >> especialy for that, a signaled fence is set to 0 so it always compare >> as signaled. Just using preincrement is exactly like starting at one. >> I don't see the need for this change but if it makes you happy. > > > Not exactly, the last emitted sequence is also used in > radeon_fence_wait_empty. So when you use post increment > radeon_fence_wait_empty will actually not wait for the last emitted fence to > be signaled, but for last emitted + 1, so it practically waits forever. > > Without this change suspend (for example) will just lockup. > > Cheers, > Christian. Yeah you right, my tree had a fix for that. I probably messed up the rebase patch at one point. Well as your version fix it i am fine with it. Cheers, Jerome >> >> Cheers, >> Jerome >>> >>> Signed-off-by: Jerome Glisse<jglisse@xxxxxxxxxx> >>> Signed-off-by: Christian König<deathsimple@xxxxxxxxxxx> >>> --- >>> drivers/gpu/drm/radeon/radeon.h | 39 ++++++----- >>> drivers/gpu/drm/radeon/radeon_fence.c | 116 >>> +++++++++++++++++++++++---------- >>> drivers/gpu/drm/radeon/radeon_ring.c | 9 ++- >>> 3 files changed, 107 insertions(+), 57 deletions(-) >>> >>> diff --git a/drivers/gpu/drm/radeon/radeon.h >>> b/drivers/gpu/drm/radeon/radeon.h >>> index e99ea81..cdf46bc 100644 >>> --- a/drivers/gpu/drm/radeon/radeon.h >>> +++ b/drivers/gpu/drm/radeon/radeon.h >>> @@ -100,28 +100,32 @@ extern int radeon_lockup_timeout; >>> * Copy from radeon_drv.h so we don't have to include both and have >>> conflicting >>> * symbol; >>> */ >>> -#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ >>> -#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) >>> +#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms >>> */ >>> +#define RADEON_FENCE_JIFFIES_TIMEOUT (HZ / 2) >>> /* RADEON_IB_POOL_SIZE must be a power of 2 */ >>> -#define RADEON_IB_POOL_SIZE 16 >>> -#define RADEON_DEBUGFS_MAX_COMPONENTS 32 >>> -#define RADEONFB_CONN_LIMIT 4 >>> -#define RADEON_BIOS_NUM_SCRATCH 8 >>> +#define RADEON_IB_POOL_SIZE 16 >>> +#define RADEON_DEBUGFS_MAX_COMPONENTS 32 >>> +#define RADEONFB_CONN_LIMIT 4 >>> +#define RADEON_BIOS_NUM_SCRATCH 8 >>> >>> /* max number of rings */ >>> -#define RADEON_NUM_RINGS 3 >>> +#define RADEON_NUM_RINGS 3 >>> + >>> +/* fence seq are set to this number when signaled */ >>> +#define RADEON_FENCE_SIGNALED_SEQ 0LL >>> +#define RADEON_FENCE_NOTEMITED_SEQ (~0LL) >>> >>> /* internal ring indices */ >>> /* r1xx+ has gfx CP ring */ >>> -#define RADEON_RING_TYPE_GFX_INDEX 0 >>> +#define RADEON_RING_TYPE_GFX_INDEX 0 >>> >>> /* cayman has 2 compute CP rings */ >>> -#define CAYMAN_RING_TYPE_CP1_INDEX 1 >>> -#define CAYMAN_RING_TYPE_CP2_INDEX 2 >>> +#define CAYMAN_RING_TYPE_CP1_INDEX 1 >>> +#define CAYMAN_RING_TYPE_CP2_INDEX 2 >>> >>> /* hardcode those limit for now */ >>> -#define RADEON_VA_RESERVED_SIZE (8<< 20) >>> -#define RADEON_IB_VM_MAX_SIZE (64<< 10) >>> +#define RADEON_VA_RESERVED_SIZE (8<< 20) >>> +#define RADEON_IB_VM_MAX_SIZE (64<< 10) >>> >>> /* >>> * Errata workarounds. >>> @@ -254,8 +258,9 @@ struct radeon_fence_driver { >>> uint32_t scratch_reg; >>> uint64_t gpu_addr; >>> volatile uint32_t *cpu_addr; >>> - atomic_t seq; >>> - uint32_t last_seq; >>> + /* seq is protected by ring emission lock */ >>> + uint64_t seq; >>> + atomic64_t last_seq; >>> unsigned long last_activity; >>> wait_queue_head_t queue; >>> struct list_head emitted; >>> @@ -268,11 +273,9 @@ struct radeon_fence { >>> struct kref kref; >>> struct list_head list; >>> /* protected by radeon_fence.lock */ >>> - uint32_t seq; >>> - bool emitted; >>> - bool signaled; >>> + uint64_t seq; >>> /* RB, DMA, etc. */ >>> - int ring; >>> + unsigned ring; >>> struct radeon_semaphore *semaphore; >>> }; >>> >>> diff --git a/drivers/gpu/drm/radeon/radeon_fence.c >>> b/drivers/gpu/drm/radeon/radeon_fence.c >>> index 5bb78bf..feb2bbc 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_fence.c >>> +++ b/drivers/gpu/drm/radeon/radeon_fence.c >>> @@ -66,14 +66,14 @@ int radeon_fence_emit(struct radeon_device *rdev, >>> struct radeon_fence *fence) >>> unsigned long irq_flags; >>> >>> write_lock_irqsave(&rdev->fence_lock, irq_flags); >>> - if (fence->emitted) { >>> + if (fence->seq&& fence->seq< RADEON_FENCE_NOTEMITED_SEQ) { >>> >>> write_unlock_irqrestore(&rdev->fence_lock, irq_flags); >>> return 0; >>> } >>> - fence->seq = >>> atomic_add_return(1,&rdev->fence_drv[fence->ring].seq); >>> >>> + /* we are protected by the ring emission mutex */ >>> + fence->seq = ++rdev->fence_drv[fence->ring].seq; >>> radeon_fence_ring_emit(rdev, fence->ring, fence); >>> trace_radeon_fence_emit(rdev->ddev, fence->seq); >>> - fence->emitted = true; >>> /* are we the first fence on a previusly idle ring? */ >>> if (list_empty(&rdev->fence_drv[fence->ring].emitted)) { >>> rdev->fence_drv[fence->ring].last_activity = jiffies; >>> @@ -87,14 +87,60 @@ static bool radeon_fence_poll_locked(struct >>> radeon_device *rdev, int ring) >>> { >>> struct radeon_fence *fence; >>> struct list_head *i, *n; >>> - uint32_t seq; >>> + uint64_t seq, last_seq; >>> + unsigned count_loop = 0; >>> bool wake = false; >>> >>> - seq = radeon_fence_read(rdev, ring); >>> - if (seq == rdev->fence_drv[ring].last_seq) >>> - return false; >>> + /* Note there is a scenario here for an infinite loop but it's >>> + * very unlikely to happen. For it to happen, the current polling >>> + * process need to be interrupted by another process and another >>> + * process needs to update the last_seq btw the atomic read and >>> + * xchg of the current process. >>> + * >>> + * More over for this to go in infinite loop there need to be >>> + * continuously new fence signaled ie radeon_fence_read needs >>> + * to return a different value each time for both the currently >>> + * polling process and the other process that xchg the last_seq >>> + * btw atomic read and xchg of the current process. And the >>> + * value the other process set as last seq must be higher than >>> + * the seq value we just read. Which means that current process >>> + * need to be interrupted after radeon_fence_read and before >>> + * atomic xchg. >>> + * >>> + * To be even more safe we count the number of time we loop and >>> + * we bail after 10 loop just accepting the fact that we might >>> + * have temporarly set the last_seq not to the true real last >>> + * seq but to an older one. >>> + */ >>> + last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq); >>> + do { >>> + seq = radeon_fence_read(rdev, ring); >>> + seq |= last_seq& 0xffffffff00000000LL; >>> >>> + if (seq< last_seq) { >>> + seq += 0x100000000LL; >>> + } >>> >>> - rdev->fence_drv[ring].last_seq = seq; >>> + if (!wake&& seq == last_seq) { >>> >>> + return false; >>> + } >>> + /* If we loop over we don't want to return without >>> + * checking if a fence is signaled as it means that the >>> + * seq we just read is different from the previous on. >>> + */ >>> + wake = true; >>> + if ((count_loop++)> 10) { >>> + /* We looped over too many time leave with the >>> + * fact that we might have set an older fence >>> + * seq then the current real last seq as signaled >>> + * by the hw. >>> + */ >>> + break; >>> + } >>> + last_seq = seq; >>> + } while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq)> >>> seq); >>> + >>> + /* reset wake to false */ >>> + wake = false; >>> rdev->fence_drv[ring].last_activity = jiffies; >>> >>> n = NULL; >>> @@ -112,7 +158,7 @@ static bool radeon_fence_poll_locked(struct >>> radeon_device *rdev, int ring) >>> n = i->prev; >>> list_move_tail(i,&rdev->fence_drv[ring].signaled); >>> >>> fence = list_entry(i, struct radeon_fence, list); >>> - fence->signaled = true; >>> + fence->seq = RADEON_FENCE_SIGNALED_SEQ; >>> i = n; >>> } while (i !=&rdev->fence_drv[ring].emitted); >>> >>> wake = true; >>> @@ -128,7 +174,7 @@ static void radeon_fence_destroy(struct kref *kref) >>> fence = container_of(kref, struct radeon_fence, kref); >>> write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); >>> list_del(&fence->list); >>> - fence->emitted = false; >>> + fence->seq = RADEON_FENCE_NOTEMITED_SEQ; >>> write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); >>> if (fence->semaphore) >>> radeon_semaphore_free(fence->rdev, fence->semaphore); >>> @@ -145,9 +191,7 @@ int radeon_fence_create(struct radeon_device *rdev, >>> } >>> kref_init(&((*fence)->kref)); >>> (*fence)->rdev = rdev; >>> - (*fence)->emitted = false; >>> - (*fence)->signaled = false; >>> - (*fence)->seq = 0; >>> + (*fence)->seq = RADEON_FENCE_NOTEMITED_SEQ; >>> (*fence)->ring = ring; >>> (*fence)->semaphore = NULL; >>> INIT_LIST_HEAD(&(*fence)->list); >>> @@ -163,18 +207,18 @@ bool radeon_fence_signaled(struct radeon_fence >>> *fence) >>> return true; >>> >>> write_lock_irqsave(&fence->rdev->fence_lock, irq_flags); >>> - signaled = fence->signaled; >>> + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ); >>> /* if we are shuting down report all fence as signaled */ >>> if (fence->rdev->shutdown) { >>> signaled = true; >>> } >>> - if (!fence->emitted) { >>> + if (fence->seq == RADEON_FENCE_NOTEMITED_SEQ) { >>> WARN(1, "Querying an unemitted fence : %p !\n", fence); >>> signaled = true; >>> } >>> if (!signaled) { >>> radeon_fence_poll_locked(fence->rdev, fence->ring); >>> - signaled = fence->signaled; >>> + signaled = (fence->seq == RADEON_FENCE_SIGNALED_SEQ); >>> } >>> write_unlock_irqrestore(&fence->rdev->fence_lock, irq_flags); >>> return signaled; >>> @@ -183,8 +227,8 @@ bool radeon_fence_signaled(struct radeon_fence >>> *fence) >>> int radeon_fence_wait(struct radeon_fence *fence, bool intr) >>> { >>> struct radeon_device *rdev; >>> - unsigned long irq_flags, timeout; >>> - u32 seq; >>> + unsigned long irq_flags, timeout, last_activity; >>> + uint64_t seq; >>> int i, r; >>> bool signaled; >>> >>> @@ -207,7 +251,9 @@ int radeon_fence_wait(struct radeon_fence *fence, >>> bool intr) >>> timeout = 1; >>> } >>> /* save current sequence value used to check for GPU >>> lockups */ >>> - seq = rdev->fence_drv[fence->ring].last_seq; >>> + seq = >>> atomic64_read(&rdev->fence_drv[fence->ring].last_seq); >>> + /* Save current last activity valuee, used to check for >>> GPU lockups */ >>> + last_activity = >>> rdev->fence_drv[fence->ring].last_activity; >>> read_unlock_irqrestore(&rdev->fence_lock, irq_flags); >>> >>> trace_radeon_fence_wait_begin(rdev->ddev, seq); >>> @@ -235,24 +281,23 @@ int radeon_fence_wait(struct radeon_fence *fence, >>> bool intr) >>> } >>> >>> write_lock_irqsave(&rdev->fence_lock, irq_flags); >>> - /* check if sequence value has changed since >>> last_activity */ >>> - if (seq != rdev->fence_drv[fence->ring].last_seq) >>> { >>> + /* test if somebody else has already decided that >>> this is a lockup */ >>> + if (last_activity != >>> rdev->fence_drv[fence->ring].last_activity) { >>> write_unlock_irqrestore(&rdev->fence_lock, >>> irq_flags); >>> continue; >>> } >>> >>> - /* change sequence value on all rings, so nobody >>> else things there is a lockup */ >>> - for (i = 0; i< RADEON_NUM_RINGS; ++i) >>> - rdev->fence_drv[i].last_seq -= 0x10000; >>> - >>> - rdev->fence_drv[fence->ring].last_activity = >>> jiffies; >>> write_unlock_irqrestore(&rdev->fence_lock, >>> irq_flags); >>> >>> if (radeon_ring_is_lockup(rdev, >>> fence->ring,&rdev->ring[fence->ring])) { >>> >>> - >>> /* good news we believe it's a lockup */ >>> - printk(KERN_WARNING "GPU lockup (waiting >>> for 0x%08X last fence id 0x%08X)\n", >>> - fence->seq, seq); >>> + dev_warn(rdev->dev, "GPU lockup (waiting >>> for 0x%016llx last fence id 0x%016llx)\n", >>> + fence->seq, seq); >>> + >>> + /* change last activity so nobody else >>> think there is a lockup */ >>> + for (i = 0; i< RADEON_NUM_RINGS; ++i) { >>> + rdev->fence_drv[i].last_activity >>> = jiffies; >>> + } >>> >>> /* mark the ring as not ready any more */ >>> rdev->ring[fence->ring].ready = false; >>> @@ -387,9 +432,9 @@ int radeon_fence_driver_start_ring(struct >>> radeon_device *rdev, int ring) >>> } >>> rdev->fence_drv[ring].cpu_addr =&rdev->wb.wb[index/4]; >>> >>> rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index; >>> - radeon_fence_write(rdev, >>> atomic_read(&rdev->fence_drv[ring]..seq), ring); >>> >>> + radeon_fence_write(rdev, rdev->fence_drv[ring].seq, ring); >>> rdev->fence_drv[ring].initialized = true; >>> - DRM_INFO("fence driver on ring %d use gpu addr 0x%08Lx and cpu >>> addr 0x%p\n", >>> + DRM_INFO("fence driver on ring %d use gpu addr 0x%016llx and cpu >>> addr 0x%p\n", >>> ring, rdev->fence_drv[ring].gpu_addr, >>> rdev->fence_drv[ring].cpu_addr); >>> write_unlock_irqrestore(&rdev->fence_lock, irq_flags); >>> return 0; >>> @@ -400,7 +445,8 @@ static void radeon_fence_driver_init_ring(struct >>> radeon_device *rdev, int ring) >>> rdev->fence_drv[ring].scratch_reg = -1; >>> rdev->fence_drv[ring].cpu_addr = NULL; >>> rdev->fence_drv[ring].gpu_addr = 0; >>> - atomic_set(&rdev->fence_drv[ring].seq, 0); >>> + rdev->fence_drv[ring].seq = 0; >>> + atomic64_set(&rdev->fence_drv[ring].last_seq, 0); >>> INIT_LIST_HEAD(&rdev->fence_drv[ring].emitted); >>> INIT_LIST_HEAD(&rdev->fence_drv[ring].signaled); >>> init_waitqueue_head(&rdev->fence_drv[ring].queue); >>> @@ -458,12 +504,12 @@ static int radeon_debugfs_fence_info(struct >>> seq_file *m, void *data) >>> continue; >>> >>> seq_printf(m, "--- ring %d ---\n", i); >>> - seq_printf(m, "Last signaled fence 0x%08X\n", >>> - radeon_fence_read(rdev, i)); >>> + seq_printf(m, "Last signaled fence 0x%016lx\n", >>> + atomic64_read(&rdev->fence_drv[i].last_seq)); >>> if (!list_empty(&rdev->fence_drv[i].emitted)) { >>> fence = >>> list_entry(rdev->fence_drv[i].emitted.prev, >>> struct radeon_fence, list); >>> - seq_printf(m, "Last emitted fence %p with >>> 0x%08X\n", >>> + seq_printf(m, "Last emitted fence %p with >>> 0x%016llx\n", >>> fence, fence->seq); >>> } >>> } >>> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c >>> b/drivers/gpu/drm/radeon/radeon_ring.c >>> index a4d60ae..4ae222b 100644 >>> --- a/drivers/gpu/drm/radeon/radeon_ring.c >>> +++ b/drivers/gpu/drm/radeon/radeon_ring.c >>> @@ -82,7 +82,7 @@ bool radeon_ib_try_free(struct radeon_device *rdev, >>> struct radeon_ib *ib) >>> bool done = false; >>> >>> /* only free ib which have been emited */ >>> - if (ib->fence&& ib->fence->emitted) { >>> + if (ib->fence&& ib->fence->seq< RADEON_FENCE_NOTEMITED_SEQ) { >>> >>> if (radeon_fence_signaled(ib->fence)) { >>> radeon_fence_unref(&ib->fence); >>> radeon_sa_bo_free(rdev,&ib->sa_bo); >>> >>> @@ -149,8 +149,9 @@ retry: >>> /* this should be rare event, ie all ib scheduled none signaled >>> yet. >>> */ >>> for (i = 0; i< RADEON_IB_POOL_SIZE; i++) { >>> - if (rdev->ib_pool.ibs[idx].fence&& >>> rdev->ib_pool.ibs[idx].fence->emitted) { >>> >>> - r = >>> radeon_fence_wait(rdev->ib_pool.ibs[idx].fence, false); >>> + struct radeon_fence *fence = >>> rdev->ib_pool.ibs[idx].fence; >>> + if (fence&& fence->seq< RADEON_FENCE_NOTEMITED_SEQ) { >>> >>> + r = radeon_fence_wait(fence, false); >>> if (!r) { >>> goto retry; >>> } >>> @@ -173,7 +174,7 @@ void radeon_ib_free(struct radeon_device *rdev, >>> struct radeon_ib **ib) >>> return; >>> } >>> radeon_mutex_lock(&rdev->ib_pool.mutex); >>> - if (tmp->fence&& !tmp->fence->emitted) { >>> + if (tmp->fence&& tmp->fence->seq == RADEON_FENCE_NOTEMITED_SEQ) >>> { >>> radeon_sa_bo_free(rdev,&tmp->sa_bo); >>> radeon_fence_unref(&tmp->fence); >>> } >>> -- >>> 1.7.5.4 >>> > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel