On Mon, May 7, 2012 at 7:42 AM, Christian König <deathsimple@xxxxxxxxxxx> wrote: > A startover with a new idea for a multiple ring allocator. > Should perform as well as a normal ring allocator as long > as only one ring does somthing, but falls back to a more > complex algorithm if more complex things start to happen. > > We store the last allocated bo in last, we always try to allocate > after the last allocated bo. Principle is that in a linear GPU ring > progression was is after last is the oldest bo we allocated and thus > the first one that should no longer be in use by the GPU. > > If it's not the case we skip over the bo after last to the closest > done bo if such one exist. If none exist and we are not asked to > block we report failure to allocate. > > If we are asked to block we wait on all the oldest fence of all > rings. We just wait for any of those fence to complete. > > v2: We need to be able to let hole point to the list_head, otherwise > try free will never free the first allocation of the list. Also > stop calling radeon_fence_signalled more than necessary. > > Signed-off-by: Christian König <deathsimple@xxxxxxxxxxx> > Signed-off-by: Jerome Glisse <jglisse@xxxxxxxxxx> This one is NAK please use my patch. Yes in my patch we never try to free anything if there is only on sa_bo in the list if you really care about this it's a one line change: http://people.freedesktop.org/~glisse/reset5/0001-drm-radeon-multiple-ring-allocator-v2.patch Your patch here can enter in infinite loop and never return holding the lock. See below. Cheers, Jerome > --- > drivers/gpu/drm/radeon/radeon.h | 7 +- > drivers/gpu/drm/radeon/radeon_ring.c | 19 +-- > drivers/gpu/drm/radeon/radeon_sa.c | 292 +++++++++++++++++++++++----------- > 3 files changed, 210 insertions(+), 108 deletions(-) > > diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h > index 37a7459..cc7f16a 100644 > --- a/drivers/gpu/drm/radeon/radeon.h > +++ b/drivers/gpu/drm/radeon/radeon.h > @@ -385,7 +385,9 @@ struct radeon_bo_list { > struct radeon_sa_manager { > spinlock_t lock; > struct radeon_bo *bo; > - struct list_head sa_bo; > + struct list_head *hole; > + struct list_head flist[RADEON_NUM_RINGS]; > + struct list_head olist; > unsigned size; > uint64_t gpu_addr; > void *cpu_ptr; > @@ -396,7 +398,8 @@ struct radeon_sa_bo; > > /* sub-allocation buffer */ > struct radeon_sa_bo { > - struct list_head list; > + struct list_head olist; > + struct list_head flist; > struct radeon_sa_manager *manager; > unsigned soffset; > unsigned eoffset; > diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c > index 1748d93..e074ff5 100644 > --- a/drivers/gpu/drm/radeon/radeon_ring.c > +++ b/drivers/gpu/drm/radeon/radeon_ring.c > @@ -204,25 +204,22 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib) > > int radeon_ib_pool_init(struct radeon_device *rdev) > { > - struct radeon_sa_manager tmp; > int i, r; > > - r = radeon_sa_bo_manager_init(rdev, &tmp, > - RADEON_IB_POOL_SIZE*64*1024, > - RADEON_GEM_DOMAIN_GTT); > - if (r) { > - return r; > - } > - > radeon_mutex_lock(&rdev->ib_pool.mutex); > if (rdev->ib_pool.ready) { > radeon_mutex_unlock(&rdev->ib_pool.mutex); > - radeon_sa_bo_manager_fini(rdev, &tmp); > return 0; > } > > - rdev->ib_pool.sa_manager = tmp; > - INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo); > + r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager, > + RADEON_IB_POOL_SIZE*64*1024, > + RADEON_GEM_DOMAIN_GTT); > + if (r) { > + radeon_mutex_unlock(&rdev->ib_pool.mutex); > + return r; > + } > + > for (i = 0; i < RADEON_IB_POOL_SIZE; i++) { > rdev->ib_pool.ibs[i].fence = NULL; > rdev->ib_pool.ibs[i].idx = i; > diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c > index 90ee8ad..757a9d4 100644 > --- a/drivers/gpu/drm/radeon/radeon_sa.c > +++ b/drivers/gpu/drm/radeon/radeon_sa.c > @@ -27,21 +27,42 @@ > * Authors: > * Jerome Glisse <glisse@xxxxxxxxxxxxxxx> > */ > +/* Algorithm: > + * > + * We store the last allocated bo in "hole", we always try to allocate > + * after the last allocated bo. Principle is that in a linear GPU ring > + * progression was is after last is the oldest bo we allocated and thus > + * the first one that should no longer be in use by the GPU. > + * > + * If it's not the case we skip over the bo after last to the closest > + * done bo if such one exist. If none exist and we are not asked to > + * block we report failure to allocate. > + * > + * If we are asked to block we wait on all the oldest fence of all > + * rings. We just wait for any of those fence to complete. > + */ > #include "drmP.h" > #include "drm.h" > #include "radeon.h" > > +static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo); > +static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager); > + > int radeon_sa_bo_manager_init(struct radeon_device *rdev, > struct radeon_sa_manager *sa_manager, > unsigned size, u32 domain) > { > - int r; > + int i, r; > > spin_lock_init(&sa_manager->lock); > sa_manager->bo = NULL; > sa_manager->size = size; > sa_manager->domain = domain; > - INIT_LIST_HEAD(&sa_manager->sa_bo); > + sa_manager->hole = &sa_manager->olist; > + INIT_LIST_HEAD(&sa_manager->olist); > + for (i = 0; i < RADEON_NUM_RINGS; ++i) { > + INIT_LIST_HEAD(&sa_manager->flist[i]); > + } > > r = radeon_bo_create(rdev, size, RADEON_GPU_PAGE_SIZE, true, > RADEON_GEM_DOMAIN_CPU, &sa_manager->bo); > @@ -58,11 +79,15 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev, > { > struct radeon_sa_bo *sa_bo, *tmp; > > - if (!list_empty(&sa_manager->sa_bo)) { > - dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n"); > + if (!list_empty(&sa_manager->olist)) { > + sa_manager->hole = &sa_manager->olist, > + radeon_sa_bo_try_free(sa_manager); > + if (!list_empty(&sa_manager->olist)) { > + dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n"); > + } > } > - list_for_each_entry_safe(sa_bo, tmp, &sa_manager->sa_bo, list) { > - list_del_init(&sa_bo->list); > + list_for_each_entry_safe(sa_bo, tmp, &sa_manager->olist, olist) { > + radeon_sa_bo_remove_locked(sa_bo); > } > radeon_bo_unref(&sa_manager->bo); > sa_manager->size = 0; > @@ -114,111 +139,181 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev, > return r; > } > > -/* > - * Principe is simple, we keep a list of sub allocation in offset > - * order (first entry has offset == 0, last entry has the highest > - * offset). > - * > - * When allocating new object we first check if there is room at > - * the end total_size - (last_object_offset + last_object_size) >= > - * alloc_size. If so we allocate new object there. > - * > - * When there is not enough room at the end, we start waiting for > - * each sub object until we reach object_offset+object_size >= > - * alloc_size, this object then become the sub object we return. > - * > - * Alignment can't be bigger than page size > - */ > - > static void radeon_sa_bo_remove_locked(struct radeon_sa_bo *sa_bo) > { > - list_del(&sa_bo->list); > + struct radeon_sa_manager *sa_manager = sa_bo->manager; > + if (sa_manager->hole == &sa_bo->olist) { > + sa_manager->hole = sa_bo->olist.prev; > + } > + list_del_init(&sa_bo->olist); > + list_del_init(&sa_bo->flist); > radeon_fence_unref(&sa_bo->fence); > kfree(sa_bo); > } > > +static void radeon_sa_bo_try_free(struct radeon_sa_manager *sa_manager) > +{ > + struct radeon_sa_bo *sa_bo, *tmp; > + > + if (sa_manager->hole->next == &sa_manager->olist) > + return; > + > + sa_bo = list_entry(sa_manager->hole->next, struct radeon_sa_bo, olist); > + list_for_each_entry_safe_from(sa_bo, tmp, &sa_manager->olist, olist) { > + if (sa_bo->fence == NULL || !radeon_fence_signaled(sa_bo->fence)) { > + return; > + } > + radeon_sa_bo_remove_locked(sa_bo); > + } > +} > + > +static inline unsigned radeon_sa_bo_hole_soffset(struct radeon_sa_manager *sa_manager) > +{ > + struct list_head *hole = sa_manager->hole; > + > + if (hole != &sa_manager->olist) { > + return list_entry(hole, struct radeon_sa_bo, olist)->eoffset; > + } > + return 0; > +} > + > +static inline unsigned radeon_sa_bo_hole_eoffset(struct radeon_sa_manager *sa_manager) > +{ > + struct list_head *hole = sa_manager->hole; > + > + if (hole->next != &sa_manager->olist) { > + return list_entry(hole->next, struct radeon_sa_bo, olist)->soffset; > + } > + return sa_manager->size; > +} > + > +static bool radeon_sa_bo_try_alloc(struct radeon_sa_manager *sa_manager, > + struct radeon_sa_bo *sa_bo, > + unsigned size, unsigned align) > +{ > + unsigned soffset, eoffset, wasted; > + > + soffset = radeon_sa_bo_hole_soffset(sa_manager); > + eoffset = radeon_sa_bo_hole_eoffset(sa_manager); > + wasted = (align - (soffset % align)) % align; > + > + if ((eoffset - soffset) >= (size + wasted)) { > + soffset += wasted; > + > + sa_bo->manager = sa_manager; > + sa_bo->soffset = soffset; > + sa_bo->eoffset = soffset + size; > + list_add(&sa_bo->olist, sa_manager->hole); > + INIT_LIST_HEAD(&sa_bo->flist); > + sa_manager->hole = &sa_bo->olist; > + return true; > + } > + return false; > +} > + > +static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager, > + struct radeon_fence **fences) > +{ > + unsigned i, soffset, best, tmp; > + > + /* if hole points to the end of the buffer */ > + if (sa_manager->hole->next == &sa_manager->olist) { > + /* try again with its beginning */ > + sa_manager->hole = &sa_manager->olist; > + return true; > + } > + > + soffset = radeon_sa_bo_hole_soffset(sa_manager); > + /* to handle wrap around we add sa_manager->size */ > + best = sa_manager->size * 2; > + /* go over all fence list and try to find the closest sa_bo > + * of the current last > + */ > + for (i = 0; i < RADEON_NUM_RINGS; ++i) { > + struct radeon_sa_bo *sa_bo; > + > + if (list_empty(&sa_manager->flist[i])) { > + fences[i] = NULL; > + continue; > + } > + > + sa_bo = list_first_entry(&sa_manager->flist[i], > + struct radeon_sa_bo, flist); > + > + if (!radeon_fence_signaled(sa_bo->fence)) { > + fences[i] = sa_bo->fence; > + continue; > + } > + > + tmp = sa_bo->soffset; > + if (tmp < soffset) { > + /* wrap around, pretend it's after */ > + tmp += sa_manager->size; > + } > + tmp -= soffset; > + if (tmp < best) { > + /* this sa bo is the closest one */ > + best = tmp; > + sa_manager->hole = sa_bo->olist.prev; > + } > + > + /* we knew that this one is signaled, > + so it's save to remote it */ > + radeon_sa_bo_remove_locked(sa_bo); > + } > + return best != sa_manager->size * 2; > +} > + > int radeon_sa_bo_new(struct radeon_device *rdev, > struct radeon_sa_manager *sa_manager, > struct radeon_sa_bo **sa_bo, > unsigned size, unsigned align, bool block) > { > - struct radeon_fence *fence = NULL; > - struct radeon_sa_bo *tmp, *next; > - struct list_head *head; > - unsigned offset = 0, wasted = 0; > - int r; > + struct radeon_fence *fences[RADEON_NUM_RINGS]; > + int r = -ENOMEM; > > BUG_ON(align > RADEON_GPU_PAGE_SIZE); > BUG_ON(size > sa_manager->size); > > *sa_bo = kmalloc(sizeof(struct radeon_sa_bo), GFP_KERNEL); > - > -retry: > + if ((*sa_bo) == NULL) { > + return -ENOMEM; > + } > + (*sa_bo)->manager = sa_manager; > + (*sa_bo)->fence = NULL; > + INIT_LIST_HEAD(&(*sa_bo)->olist); > + INIT_LIST_HEAD(&(*sa_bo)->flist); > > spin_lock(&sa_manager->lock); > + do { > + /* try to allocate couple time before going to wait */ > + do { > + radeon_sa_bo_try_free(sa_manager); > > - /* no one ? */ > - head = sa_manager->sa_bo.prev; > - if (list_empty(&sa_manager->sa_bo)) { > - goto out; > - } > - > - /* look for a hole big enough */ > - offset = 0; > - list_for_each_entry_safe(tmp, next, &sa_manager->sa_bo, list) { > - /* try to free this object */ > - if (tmp->fence) { > - if (radeon_fence_signaled(tmp->fence)) { > - radeon_sa_bo_remove_locked(tmp); > - continue; > - } else { > - fence = tmp->fence; > + if (radeon_sa_bo_try_alloc(sa_manager, *sa_bo, > + size, align)) { > + spin_unlock(&sa_manager->lock); > + return 0; > } > - } > > - /* room before this object ? */ > - if (offset < tmp->soffset && (tmp->soffset - offset) >= size) { > - head = tmp->list.prev; > - goto out; > - } > - offset = tmp->eoffset; > - wasted = offset % align; > - if (wasted) { > - wasted = align - wasted; > - } > - offset += wasted; > - } > - /* room at the end ? */ > - head = sa_manager->sa_bo.prev; > - tmp = list_entry(head, struct radeon_sa_bo, list); > - offset = tmp->eoffset; > - wasted = offset % align; > - if (wasted) { > - wasted = align - wasted; > - } > - offset += wasted; > - if ((sa_manager->size - offset) < size) { > - /* failed to find somethings big enough */ > - spin_unlock(&sa_manager->lock); > - if (block && fence) { > - r = radeon_fence_wait(fence, false); > - if (r) > - return r; > - > - goto retry; > + /* see if we can skip over some allocations */ > + } while (radeon_sa_bo_next_hole(sa_manager, fences)); Here you can infinite loop, in the case there is a bunch of hole in the allocator but none of them allow to full fill the allocation. radeon_sa_bo_next_hole will keep returning true looping over and over on all the all. That's why i only restrict my patch to 2 hole skeeping and then fails the allocation or try to wait. I believe sadly we need an heuristic and 2 hole skeeping at most sounded like a good one. > + > + if (block) { > + spin_unlock(&sa_manager->lock); > + r = radeon_fence_wait_any(rdev, fences, false); > + spin_lock(&sa_manager->lock); > + if (r) { > + goto out_err; > + } > } > - kfree(*sa_bo); > - *sa_bo = NULL; > - return -ENOMEM; > - } > + } while (block); > > -out: > - (*sa_bo)->manager = sa_manager; > - (*sa_bo)->soffset = offset; > - (*sa_bo)->eoffset = offset + size; > - list_add(&(*sa_bo)->list, head); > +out_err: > spin_unlock(&sa_manager->lock); > - return 0; > + kfree(*sa_bo); > + *sa_bo = NULL; > + return r; > } > > void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, > @@ -226,13 +321,16 @@ void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo **sa_bo, > { > struct radeon_sa_manager *sa_manager; > > - if (!sa_bo || !*sa_bo) > + if (sa_bo == NULL || *sa_bo == NULL) { > return; > + } > > sa_manager = (*sa_bo)->manager; > spin_lock(&sa_manager->lock); > if (fence && fence->seq && fence->seq < RADEON_FENCE_NOTEMITED_SEQ) { > (*sa_bo)->fence = radeon_fence_ref(fence); > + list_add_tail(&(*sa_bo)->flist, > + &sa_manager->flist[fence->ring]); > } else { > radeon_sa_bo_remove_locked(*sa_bo); > } > @@ -247,15 +345,19 @@ void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager, > struct radeon_sa_bo *i; > > spin_lock(&sa_manager->lock); > - list_for_each_entry(i, &sa_manager->sa_bo, list) { > - seq_printf(m, "[%08x %08x] size %4d (%p)", > - i->soffset, i->eoffset, i->eoffset - i->soffset, i); > - if (i->fence) { > - seq_printf(m, " protected by %Ld (%p) on ring %d\n", > - i->fence->seq, i->fence, i->fence->ring); > + list_for_each_entry(i, &sa_manager->olist, olist) { > + if (&i->olist == sa_manager->hole) { > + seq_printf(m, ">"); > } else { > - seq_printf(m, "\n"); > + seq_printf(m, " "); > + } > + seq_printf(m, "[0x%08x 0x%08x] size %8d", > + i->soffset, i->eoffset, i->eoffset - i->soffset); > + if (i->fence) { > + seq_printf(m, " protected by 0x%016llx on ring %d", > + i->fence->seq, i->fence->ring); > } > + seq_printf(m, "\n"); > } > spin_unlock(&sa_manager->lock); > } > -- > 1.7.5.4 > _______________________________________________ dri-devel mailing list dri-devel@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/dri-devel