Re: [PATCH 09/26] drm/radeon: add biggest hole tracking and wakequeue to the sa v3

Jerome Glisse <j.glisse@xxxxxxxxx> · Mon, 30 Apr 2012 11:45:36 -0400

On Mon, Apr 30, 2012 at 10:50 AM, Christian König
<deathsimple@xxxxxxxxxxx> wrote:
> With that in place clients are automatically blocking
> until their memory request can be handled.
>
> v2: block only if the memory request can't be satisfied
>    in the first try, the first version actually lacked
>    a night of sleep.
>
> v3: make blocking optional, update comments and fix
>    another bug with biggest hole tracking.
>
> Signed-off-by: Christian König <deathsimple@xxxxxxxxxxx>

I would say NAK, the idea of SA allocator is to be like ring buffer, a
ring allocation. All the allocation made through SA are made for
object that life are ring related, ie once allocated the object is
scheduled through one of the ring and once the ring is done consuming
it the object is free. So the idea behind the SA allocator is to make
it simple we keep track of the highest offset used and the lowest one
free, we try to find room above the highest and if we don't have
enough room we try at the begining of the ring (wrap over), idea is
that in most scenario we don't have to wait because SA is big enough
and if we have to wait well it's because GPU is full of things to do
so waiting a bit won't starve the GPU.

That being said current code doesn't exactly reflect that, i can't
remember why. Anyway i would rather make current looks like intented
as i believe it make allocation easy and fast in usual case. There is
also a reason why i intended to have several sa manager. Idea is to
have one SA manager for the VM (because VM might last longer) and one
for all the ring object (semaphore, ib, ...) because usual case is
that all ring sync from time to time and the all progress in //.

Cheers,
Jerome
> ---
>  drivers/gpu/drm/radeon/radeon.h        |    5 +-
>  drivers/gpu/drm/radeon/radeon_gart.c   |    2 +-
>  drivers/gpu/drm/radeon/radeon_object.h |    2 +-
>  drivers/gpu/drm/radeon/radeon_ring.c   |   20 ++--
>  drivers/gpu/drm/radeon/radeon_sa.c     |  211 +++++++++++++++++++++++---------
>  5 files changed, 165 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
> index 99b188a..e71dc67 100644
> --- a/drivers/gpu/drm/radeon/radeon.h
> +++ b/drivers/gpu/drm/radeon/radeon.h
> @@ -381,17 +381,16 @@ struct radeon_bo_list {
>  * alignment).
>  */
>  struct radeon_sa_manager {
> -       spinlock_t              lock;
> +       wait_queue_head_t       queue;
>        struct radeon_bo        *bo;
>        struct list_head        sa_bo;
>        unsigned                size;
> +       struct list_head        *biggest_hole;
>        uint64_t                gpu_addr;
>        void                    *cpu_ptr;
>        uint32_t                domain;
>  };
>
> -struct radeon_sa_bo;
> -
>  /* sub-allocation buffer */
>  struct radeon_sa_bo {
>        struct list_head                list;
> diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
> index c58a036..7af4ff9 100644
> --- a/drivers/gpu/drm/radeon/radeon_gart.c
> +++ b/drivers/gpu/drm/radeon/radeon_gart.c
> @@ -395,7 +395,7 @@ int radeon_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm)
>  retry:
>        r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager, &vm->sa_bo,
>                             RADEON_GPU_PAGE_ALIGN(vm->last_pfn * 8),
> -                            RADEON_GPU_PAGE_SIZE);
> +                            RADEON_GPU_PAGE_SIZE, false);
>        if (r) {
>                if (list_empty(&rdev->vm_manager.lru_vm)) {
>                        return r;
> diff --git a/drivers/gpu/drm/radeon/radeon_object.h b/drivers/gpu/drm/radeon/radeon_object.h
> index d9b9333..85f33d9 100644
> --- a/drivers/gpu/drm/radeon/radeon_object.h
> +++ b/drivers/gpu/drm/radeon/radeon_object.h
> @@ -158,7 +158,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
>  extern int radeon_sa_bo_new(struct radeon_device *rdev,
>                            struct radeon_sa_manager *sa_manager,
>                            struct radeon_sa_bo *sa_bo,
> -                           unsigned size, unsigned align);
> +                           unsigned size, unsigned align, bool block);
>  extern void radeon_sa_bo_free(struct radeon_device *rdev,
>                              struct radeon_sa_bo *sa_bo);
>  #if defined(CONFIG_DEBUG_FS)
> diff --git a/drivers/gpu/drm/radeon/radeon_ring.c b/drivers/gpu/drm/radeon/radeon_ring.c
> index 1d9bce9..ccee74f 100644
> --- a/drivers/gpu/drm/radeon/radeon_ring.c
> +++ b/drivers/gpu/drm/radeon/radeon_ring.c
> @@ -124,7 +124,7 @@ retry:
>                if (rdev->ib_pool.ibs[idx].fence == NULL) {
>                        r = radeon_sa_bo_new(rdev, &rdev->ib_pool.sa_manager,
>                                             &rdev->ib_pool.ibs[idx].sa_bo,
> -                                            size, 256);
> +                                            size, 256, false);
>                        if (!r) {
>                                *ib = &rdev->ib_pool.ibs[idx];
>                                (*ib)->ptr = rdev->ib_pool.sa_manager.cpu_ptr;
> @@ -205,10 +205,16 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib)
>
>  int radeon_ib_pool_init(struct radeon_device *rdev)
>  {
> -       struct radeon_sa_manager tmp;
>        int i, r;
>
> -       r = radeon_sa_bo_manager_init(rdev, &tmp,
> +       radeon_mutex_lock(&rdev->ib_pool.mutex);
> +       if (rdev->ib_pool.ready) {
> +               return 0;
> +       }
> +       rdev->ib_pool.ready = true;
> +       radeon_mutex_unlock(&rdev->ib_pool.mutex);
> +
> +       r = radeon_sa_bo_manager_init(rdev, &rdev->ib_pool.sa_manager,
>                                      RADEON_IB_POOL_SIZE*64*1024,
>                                      RADEON_GEM_DOMAIN_GTT);
>        if (r) {
> @@ -216,14 +222,6 @@ int radeon_ib_pool_init(struct radeon_device *rdev)
>        }
>
>        radeon_mutex_lock(&rdev->ib_pool.mutex);
> -       if (rdev->ib_pool.ready) {
> -               radeon_mutex_unlock(&rdev->ib_pool.mutex);
> -               radeon_sa_bo_manager_fini(rdev, &tmp);
> -               return 0;
> -       }
> -
> -       rdev->ib_pool.sa_manager = tmp;
> -       INIT_LIST_HEAD(&rdev->ib_pool.sa_manager.sa_bo);
>        for (i = 0; i < RADEON_IB_POOL_SIZE; i++) {
>                rdev->ib_pool.ibs[i].fence = NULL;
>                rdev->ib_pool.ibs[i].idx = i;
> diff --git a/drivers/gpu/drm/radeon/radeon_sa.c b/drivers/gpu/drm/radeon/radeon_sa.c
> index 013a787..92ab7b4 100644
> --- a/drivers/gpu/drm/radeon/radeon_sa.c
> +++ b/drivers/gpu/drm/radeon/radeon_sa.c
> @@ -26,6 +26,7 @@
>  /*
>  * Authors:
>  *    Jerome Glisse <glisse@xxxxxxxxxxxxxxx>
> + *    Christian König <christian.koenig@xxxxxxx>
>  */
>  #include "drmP.h"
>  #include "drm.h"
> @@ -37,9 +38,10 @@ int radeon_sa_bo_manager_init(struct radeon_device *rdev,
>  {
>        int r;
>
> -       spin_lock_init(&sa_manager->lock);
> +       init_waitqueue_head(&sa_manager->queue);
>        sa_manager->bo = NULL;
>        sa_manager->size = size;
> +       sa_manager->biggest_hole = &sa_manager->sa_bo;
>        sa_manager->domain = domain;
>        INIT_LIST_HEAD(&sa_manager->sa_bo);
>
> @@ -58,6 +60,7 @@ void radeon_sa_bo_manager_fini(struct radeon_device *rdev,
>  {
>        struct radeon_sa_bo *sa_bo, *tmp;
>
> +       wake_up_all(&sa_manager->queue);
>        if (!list_empty(&sa_manager->sa_bo)) {
>                dev_err(rdev->dev, "sa_manager is not empty, clearing anyway\n");
>        }
> @@ -114,96 +117,186 @@ int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
>        return r;
>  }
>
> +static inline unsigned radeon_sa_bo_hole_start(struct radeon_sa_manager *m,
> +                                              struct list_head *entry)
> +{
> +       struct radeon_sa_bo *sa_bo;
> +
> +       if (entry == &m->sa_bo)
> +               return 0;
> +
> +       sa_bo = list_entry(entry, struct radeon_sa_bo, list);
> +       return sa_bo->offset + sa_bo->size;
> +}
> +
> +static inline unsigned radeon_sa_bo_hole_end(struct radeon_sa_manager *m,
> +                                            struct list_head *entry)
> +{
> +       if (entry->next == &m->sa_bo)
> +               return m->size;
> +
> +       return list_entry(entry->next, struct radeon_sa_bo, list)->offset;
> +}
> +
> +static inline unsigned radeon_sa_bo_min_free(struct radeon_sa_manager *m,
> +                                            unsigned align)
> +{
> +       unsigned start, end, wasted;
> +       start = radeon_sa_bo_hole_start(m, m->biggest_hole);
> +       wasted = start % align;
> +       if (wasted)
> +               start += align - wasted;
> +
> +       end = radeon_sa_bo_hole_end(m, m->biggest_hole);
> +       return start < end ? end - start : 0;
> +}
> +
>  /*
>  * Principe is simple, we keep a list of sub allocation in offset
>  * order (first entry has offset == 0, last entry has the highest
>  * offset).
>  *
> - * When allocating new object we first check if there is room at
> - * the end total_size - (last_object_offset + last_object_size) >=
> - * alloc_size. If so we allocate new object there.
> + * When allocating new objects we start checking at what's currently
> + * assumed to be the biggest hole, if that's not big enough we continue
> + * searching the list until we find something big enough or reach the
> + * biggest hole again. If the later happen we optionally block for the
> + * biggest hole to increase in size.
>  *
> - * When there is not enough room at the end, we start waiting for
> - * each sub object until we reach object_offset+object_size >=
> - * alloc_size, this object then become the sub object we return.
> - *
> - * Alignment can't be bigger than page size
>  */
>  int radeon_sa_bo_new(struct radeon_device *rdev,
>                     struct radeon_sa_manager *sa_manager,
>                     struct radeon_sa_bo *sa_bo,
> -                    unsigned size, unsigned align)
> +                    unsigned size, unsigned align, bool block)
>  {
> -       struct radeon_sa_bo *tmp;
> -       struct list_head *head;
> -       unsigned offset = 0, wasted = 0;
> -       unsigned long flags;
> +       struct list_head *head, *curr, *hole;
> +       unsigned start, currsize, wasted, holesize;
> +       int r;
>
>        BUG_ON(align > RADEON_GPU_PAGE_SIZE);
>        BUG_ON(size > sa_manager->size);
> -       spin_lock_irqsave(&sa_manager->lock, flags);
>
> -       /* no one ? */
> -       if (list_empty(&sa_manager->sa_bo)) {
> -               head = &sa_manager->sa_bo;
> -               goto out;
> -       }
> +       spin_lock_irq(&sa_manager->queue.lock);
> +
> +       do {
> +               curr = head = hole = sa_manager->biggest_hole;
> +               holesize = radeon_sa_bo_min_free(sa_manager, 1);
> +               do {
> +                       start = radeon_sa_bo_hole_start(sa_manager, curr);
> +                       currsize = radeon_sa_bo_hole_end(sa_manager, curr) - start;
> +
> +                       wasted = start % align;
> +                       if (wasted) {
> +                               wasted = align - wasted;
> +                       }
>
> -       /* look for a hole big enough */
> -       list_for_each_entry(tmp, &sa_manager->sa_bo, list) {
> -               /* room before this object ? */
> -               if (offset < tmp->offset && (tmp->offset - offset) >= size) {
> -                       head = tmp->list.prev;
> -                       goto out;
> +                       /* room after current big enough ? */
> +                       if (currsize >= (size + wasted)) {
> +                               sa_bo->manager = sa_manager;
> +                               sa_bo->offset = start + wasted;
> +                               sa_bo->size = size;
> +                               list_add(&sa_bo->list, curr);
> +
> +                               /* consider the space left after the newly
> +                                  added sa_bo as the biggest hole */
> +                               currsize -= (size + wasted);
> +                               if (hole == sa_bo->list.prev || holesize < currsize) {
> +                                       hole = &sa_bo->list;
> +                               }
> +
> +                               if (sa_manager->biggest_hole != hole) {
> +                                       sa_manager->biggest_hole = hole;
> +                                       wake_up_locked(&sa_manager->queue);
> +                               }
> +                               spin_unlock_irq(&sa_manager->queue.lock);
> +                               return 0;
> +                       }
> +
> +                       if (holesize < currsize) {
> +                               hole = curr;
> +                               holesize = currsize;
> +                       }
> +
> +                       curr = curr->next;
> +               } while (curr != head);
> +
> +               if (sa_manager->biggest_hole != hole) {
> +                       sa_manager->biggest_hole = hole;
>                }
> -               offset = tmp->offset + tmp->size;
> -               wasted = offset % align;
> -               if (wasted) {
> -                       offset += align - wasted;
> +
> +               if (block) {
> +                       /* failed to find something big enough, wait
> +                          for the biggest hole to increase in size */
> +                       r = wait_event_interruptible_locked_irq(sa_manager->queue,
> +                               radeon_sa_bo_min_free(sa_manager, align) >= size
> +                       );
> +                       if (r) {
> +                               spin_unlock_irq(&sa_manager->queue.lock);
> +                               return r;
> +                       }
>                }
> -       }
> -       /* room at the end ? */
> -       head = sa_manager->sa_bo.prev;
> -       tmp = list_entry(head, struct radeon_sa_bo, list);
> -       offset = tmp->offset + tmp->size;
> -       wasted = offset % align;
> -       if (wasted) {
> -               offset += wasted = align - wasted;
> -       }
> -       if ((sa_manager->size - offset) < size) {
> -               /* failed to find somethings big enough */
> -               spin_unlock_irqrestore(&sa_manager->lock, flags);
> -               return -ENOMEM;
> -       }
> +       } while (block);
> +       spin_unlock_irq(&sa_manager->queue.lock);
>
> -out:
> -       sa_bo->manager = sa_manager;
> -       sa_bo->offset = offset;
> -       sa_bo->size = size;
> -       list_add(&sa_bo->list, head);
> -       spin_unlock_irqrestore(&sa_manager->lock, flags);
> -       return 0;
> +       return -ENOMEM;
>  }
>
>  void radeon_sa_bo_free(struct radeon_device *rdev, struct radeon_sa_bo *sa_bo)
>  {
> +       struct radeon_sa_manager *sa_manager = sa_bo->manager;
> +       unsigned bsize, fsize;
>        unsigned long flags;
> -       spin_lock_irqsave(&sa_bo->manager->lock, flags);
> +
> +       spin_lock_irqsave(&sa_manager->queue.lock, flags);
> +       if (&sa_bo->list == sa_manager->biggest_hole ||
> +           sa_bo->list.prev == sa_manager->biggest_hole) {
> +
> +               sa_manager->biggest_hole = sa_bo->list.prev;
> +               wake_up_locked(&sa_manager->queue);
> +       } else {
> +               bsize = radeon_sa_bo_min_free(sa_manager, 1);
> +               fsize = radeon_sa_bo_hole_start(sa_manager, sa_bo->list.prev);
> +               fsize = radeon_sa_bo_hole_end(sa_manager, &sa_bo->list) - fsize;
> +               if (fsize > bsize) {
> +                       sa_manager->biggest_hole = sa_bo->list.prev;
> +                       wake_up_locked(&sa_manager->queue);
> +               }
> +       }
>        list_del_init(&sa_bo->list);
> -       spin_unlock_irqrestore(&sa_bo->manager->lock, flags);
> +       spin_unlock_irqrestore(&sa_manager->queue.lock, flags);
>  }
>
>  #if defined(CONFIG_DEBUG_FS)
>  void radeon_sa_bo_dump_debug_info(struct radeon_sa_manager *sa_manager,
>                                  struct seq_file *m)
>  {
> -       struct radeon_sa_bo *i;
> +       struct list_head *head, *curr;
> +       struct radeon_sa_bo *sa_bo;
> +       unsigned start, end;
>        unsigned long flags;
>
> -       spin_lock_irqsave(&sa_manager->lock, flags);
> -       list_for_each_entry(i, &sa_manager->sa_bo, list) {
> -               seq_printf(m, "offset %08d: size %4d\n", i->offset, i->size);
> -       }
> -       spin_unlock_irqrestore(&sa_manager->lock, flags);
> +       spin_lock_irqsave(&sa_manager->queue.lock, flags);
> +       curr = head = &sa_manager->sa_bo;
> +       do {
> +               if (curr != &sa_manager->sa_bo) {
> +                       sa_bo = list_entry(curr, struct radeon_sa_bo, list);
> +                       seq_printf(m, "reservation  %p %08d: size %7d\n",
> +                                  curr, sa_bo->offset, sa_bo->size);
> +               }
> +
> +               start = radeon_sa_bo_hole_start(sa_manager, curr);
> +               end = radeon_sa_bo_hole_end(sa_manager, curr);
> +               if (start < end) {
> +                       seq_printf(m, "hole         %p %08d: size %7d\n",
> +                                  curr, start, end - start);
> +               }
> +               curr = curr->next;
> +       } while (curr != head);
> +
> +       start = radeon_sa_bo_hole_start(sa_manager, sa_manager->biggest_hole);
> +       end = radeon_sa_bo_hole_end(sa_manager, sa_manager->biggest_hole);
> +       seq_printf(m, "\nbiggest hole %p %08d: size %7d\n",
> +                  sa_manager->biggest_hole, start, end - start);
> +
> +       spin_unlock_irqrestore(&sa_manager->queue.lock, flags);
>  }
>  #endif
> --
> 1.7.5.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel@xxxxxxxxxxxxxxxxxxxxx
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
_______________________________________________
dri-devel mailing list
dri-devel@xxxxxxxxxxxxxxxxxxxxx
http://lists.freedesktop.org/mailman/listinfo/dri-devel