Re: [PATCH 2/2] drm/i915: Report all objects with allocated pages to the shrinker

Matthew Auld <matthew.william.auld@xxxxxxxxx> · Thu, 30 May 2019 17:10:16 +0100

On Tue, 28 May 2019 at 20:50, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote:
>
> Currently, we try to report to the shrinker the precise number of
> objects (pages) that are available to be reaped at this moment. This
> requires searching all objects with allocated pages to see if they
> fulfill the search criteria, and this count is performed quite
> frequently. (The shrinker tries to free ~128 pages on each invocation,
> before which we count all the objects; counting takes longer than
> unbinding the objects!) If we take the pragmatic view that with
> sufficient desire, all objects are eventually reapable (they become
> inactive, or no longer used as framebuffer etc), we can simply return
> the count of pinned pages maintained during get_pages/put_pages rather
> than walk the lists every time.
>
> The downside is that we may (slightly) over-report the number of
> objects/pages we could shrink and so penalize ourselves by shrinking
> more than required. This is mitigated by keeping the order in which we
> shrink objects such that we avoid penalizing active and frequently used
> objects, and if memory is so tight that we need to free them we would
> need to anyway.
>
> v2: Only expose shrinkable objects to the shrinker; a small reduction in
> not considering stolen and foreign objects.
>
> Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx>
> Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx>
> Cc: Matthew Auld <matthew.auld@xxxxxxxxx>
> ---
>  drivers/gpu/drm/i915/gem/i915_gem_domain.c   |  3 +-
>  drivers/gpu/drm/i915/gem/i915_gem_object.c   | 22 --------
>  drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 28 ++--------
>  drivers/gpu/drm/i915/gem/i915_gem_stolen.c   |  3 +-
>  drivers/gpu/drm/i915/i915_debugfs.c          | 58 ++------------------
>  drivers/gpu/drm/i915/i915_drv.h              |  7 +--
>  drivers/gpu/drm/i915/i915_gem.c              | 23 ++++----
>  drivers/gpu/drm/i915/i915_vma.c              | 16 ++++--
>  8 files changed, 41 insertions(+), 119 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> index 52b73e90c9f4..e5deae62681f 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c
> @@ -475,7 +475,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
>         }
>         mutex_unlock(&i915->ggtt.vm.mutex);
>
> -       if (obj->mm.madv == I915_MADV_WILLNEED) {
> +       if (i915_gem_object_is_shrinkable(obj) &&
> +           obj->mm.madv == I915_MADV_WILLNEED) {
>                 struct list_head *list;
>
>                 spin_lock(&i915->mm.obj_lock);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> index f064876f1214..1fccb1de5851 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c
> @@ -44,25 +44,6 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj)
>         return kmem_cache_free(global.slab_objects, obj);
>  }
>
> -/* some bookkeeping */
> -static void i915_gem_info_add_obj(struct drm_i915_private *i915,
> -                                 u64 size)
> -{
> -       spin_lock(&i915->mm.object_stat_lock);
> -       i915->mm.object_count++;
> -       i915->mm.object_memory += size;
> -       spin_unlock(&i915->mm.object_stat_lock);
> -}
> -
> -static void i915_gem_info_remove_obj(struct drm_i915_private *i915,
> -                                    u64 size)
> -{
> -       spin_lock(&i915->mm.object_stat_lock);
> -       i915->mm.object_count--;
> -       i915->mm.object_memory -= size;
> -       spin_unlock(&i915->mm.object_stat_lock);
> -}
> -
>  static void
>  frontbuffer_retire(struct i915_active_request *active,
>                    struct i915_request *request)
> @@ -98,8 +79,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj,
>         obj->mm.madv = I915_MADV_WILLNEED;
>         INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN);
>         mutex_init(&obj->mm.get_page.lock);
> -
> -       i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size);
>  }
>
>  /**
> @@ -240,7 +219,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915,
>
>                 reservation_object_fini(&obj->__builtin_resv);
>                 drm_gem_object_release(&obj->base);
> -               i915_gem_info_remove_obj(i915, obj->base.size);
>
>                 bitmap_free(obj->bit_17);
>                 i915_gem_object_free(obj);
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> index 6a93e326abf3..d71e630c6fb8 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
> @@ -309,30 +309,14 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
>  {
>         struct drm_i915_private *i915 =
>                 container_of(shrinker, struct drm_i915_private, mm.shrinker);
> -       struct drm_i915_gem_object *obj;
> -       unsigned long num_objects = 0;
> -       unsigned long count = 0;
> +       unsigned long num_objects;
> +       unsigned long count;
>
> -       spin_lock(&i915->mm.obj_lock);
> -       list_for_each_entry(obj, &i915->mm.unbound_list, mm.link)
> -               if (can_release_pages(obj)) {
> -                       count += obj->base.size >> PAGE_SHIFT;
> -                       num_objects++;
> -               }
> +       count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT;
> +       num_objects = READ_ONCE(i915->mm.shrink_count);
>
> -       list_for_each_entry(obj, &i915->mm.bound_list, mm.link)
> -               if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
> -                       count += obj->base.size >> PAGE_SHIFT;
> -                       num_objects++;
> -               }
> -       list_for_each_entry(obj, &i915->mm.purge_list, mm.link)
> -               if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) {
> -                       count += obj->base.size >> PAGE_SHIFT;
> -                       num_objects++;
> -               }
> -       spin_unlock(&i915->mm.obj_lock);
> -
> -       /* Update our preferred vmscan batch size for the next pass.
> +       /*
> +        * Update our preferred vmscan batch size for the next pass.
>          * Our rough guess for an effective batch size is roughly 2
>          * available GEM objects worth of pages. That is we don't want
>          * the shrinker to fire, until it is worth the cost of freeing an
> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> index 9080a736663a..8b3a23bff7f6 100644
> --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c
> @@ -690,7 +690,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
>         mutex_unlock(&ggtt->vm.mutex);
>
>         spin_lock(&dev_priv->mm.obj_lock);
> -       list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
> +       if (i915_gem_object_is_shrinkable(obj))
> +               list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
>         obj->bind_count++;
>         spin_unlock(&dev_priv->mm.obj_lock);
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e415d7ef90f2..214dbd698d8e 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -271,7 +271,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data)
>         unsigned long total, count, n;
>         int ret;
>
> -       total = READ_ONCE(dev_priv->mm.object_count);
> +       total = READ_ONCE(dev_priv->mm.shrink_count);
>         objects = kvmalloc_array(total, sizeof(*objects), GFP_KERNEL);
>         if (!objects)
>                 return -ENOMEM;
> @@ -460,9 +460,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>         char buf[80];
>         int ret;
>
> -       seq_printf(m, "%u objects, %llu bytes\n",
> -                  dev_priv->mm.object_count,
> -                  dev_priv->mm.object_memory);
> +       seq_printf(m, "%u shrinkable objects, %llu bytes\n",
> +                  dev_priv->mm.shrink_count,
> +                  dev_priv->mm.shrink_memory);
>
>         size = count = 0;
>         mapped_size = mapped_count = 0;
> @@ -552,55 +552,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
>         return 0;
>  }
>
> -static int i915_gem_gtt_info(struct seq_file *m, void *data)
> -{
> -       struct drm_info_node *node = m->private;
> -       struct drm_i915_private *dev_priv = node_to_i915(node);
> -       struct drm_device *dev = &dev_priv->drm;
> -       struct drm_i915_gem_object **objects;
> -       struct drm_i915_gem_object *obj;
> -       u64 total_obj_size, total_gtt_size;
> -       unsigned long nobject, n;
> -       int count, ret;
> -
> -       nobject = READ_ONCE(dev_priv->mm.object_count);
> -       objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL);
> -       if (!objects)
> -               return -ENOMEM;
> -
> -       ret = mutex_lock_interruptible(&dev->struct_mutex);
> -       if (ret)
> -               return ret;
> -
> -       count = 0;
> -       spin_lock(&dev_priv->mm.obj_lock);
> -       list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) {
> -               objects[count++] = obj;
> -               if (count == nobject)
> -                       break;
> -       }
> -       spin_unlock(&dev_priv->mm.obj_lock);
> -
> -       total_obj_size = total_gtt_size = 0;
> -       for (n = 0;  n < count; n++) {
> -               obj = objects[n];
> -
> -               seq_puts(m, "   ");
> -               describe_obj(m, obj);
> -               seq_putc(m, '\n');
> -               total_obj_size += obj->base.size;
> -               total_gtt_size += i915_gem_obj_total_ggtt_size(obj);
> -       }
> -
> -       mutex_unlock(&dev->struct_mutex);
> -
> -       seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n",
> -                  count, total_obj_size, total_gtt_size);
> -       kvfree(objects);
> -
> -       return 0;
> -}
> -
>  static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
>  {
>         struct drm_i915_private *dev_priv = node_to_i915(m->private);
> @@ -4584,7 +4535,6 @@ static const struct file_operations i915_fifo_underrun_reset_ops = {
>  static const struct drm_info_list i915_debugfs_list[] = {
>         {"i915_capabilities", i915_capabilities, 0},
>         {"i915_gem_objects", i915_gem_object_info, 0},
> -       {"i915_gem_gtt", i915_gem_gtt_info, 0},
>         {"i915_gem_stolen", i915_gem_stolen_list_info },
>         {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
>         {"i915_gem_interrupt", i915_interrupt_info, 0},
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index fb2e89133e78..770c54b87de6 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -926,10 +926,9 @@ struct i915_gem_mm {
>         /** Bit 6 swizzling required for Y tiling */
>         u32 bit_6_swizzle_y;
>
> -       /* accounting, useful for userland debugging */
> -       spinlock_t object_stat_lock;
> -       u64 object_memory;
> -       u32 object_count;
> +       /* shrinker accounting, also useful for userland debugging */
> +       u64 shrink_memory;
> +       u32 shrink_count;

Colour me confused. I can't see where we set these? Or is my brain fried?

>  };
>
>  #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index d98ccbbde53c..0a5049aec144 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1137,15 +1137,17 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
>         if (i915_gem_object_has_pages(obj)) {
>                 struct list_head *list;
>
> -               spin_lock(&i915->mm.obj_lock);
> -               if (obj->mm.madv != I915_MADV_WILLNEED)
> -                       list = &i915->mm.purge_list;
> -               else if (obj->bind_count)
> -                       list = &i915->mm.bound_list;
> -               else
> -                       list = &i915->mm.unbound_list;
> -               list_move_tail(&obj->mm.link, list);
> -               spin_unlock(&i915->mm.obj_lock);
> +               if (i915_gem_object_is_shrinkable(obj)) {
> +                       spin_lock(&i915->mm.obj_lock);
> +                       if (obj->mm.madv != I915_MADV_WILLNEED)
> +                               list = &i915->mm.purge_list;
> +                       else if (obj->bind_count)
> +                               list = &i915->mm.bound_list;
> +                       else
> +                               list = &i915->mm.unbound_list;
> +                       list_move_tail(&obj->mm.link, list);
> +                       spin_unlock(&i915->mm.obj_lock);
> +               }
>         }
>
>         /* if the object is no longer attached, discard its backing storage */
> @@ -1750,7 +1752,6 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv)
>
>  static void i915_gem_init__mm(struct drm_i915_private *i915)
>  {
> -       spin_lock_init(&i915->mm.object_stat_lock);
>         spin_lock_init(&i915->mm.obj_lock);
>         spin_lock_init(&i915->mm.free_lock);
>
> @@ -1800,7 +1801,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv)
>         i915_gem_drain_freed_objects(dev_priv);
>         GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list));
>         GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count));
> -       WARN_ON(dev_priv->mm.object_count);
> +       WARN_ON(dev_priv->mm.shrink_count);
>
>         cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu);
>
> diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
> index f640caec4bae..b7fb7d216f77 100644
> --- a/drivers/gpu/drm/i915/i915_vma.c
> +++ b/drivers/gpu/drm/i915/i915_vma.c
> @@ -110,7 +110,8 @@ static void __i915_vma_retire(struct i915_active *ref)
>          * so that we don't steal from recently used but inactive objects
>          * (unless we are forced to ofc!)
>          */
> -       obj_bump_mru(obj);
> +       if (i915_gem_object_is_shrinkable(obj))
> +               obj_bump_mru(obj);
>
>         i915_gem_object_put(obj); /* and drop the active reference */
>  }
> @@ -677,11 +678,14 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
>                 struct drm_i915_gem_object *obj = vma->obj;
>
>                 spin_lock(&dev_priv->mm.obj_lock);
> -               list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
> -               obj->bind_count++;
> -               spin_unlock(&dev_priv->mm.obj_lock);
>
> +               if (i915_gem_object_is_shrinkable(obj))
> +                       list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list);
> +
> +               obj->bind_count++;
>                 assert_bind_count(obj);
> +
> +               spin_unlock(&dev_priv->mm.obj_lock);
>         }
>
>         return 0;
> @@ -717,9 +721,13 @@ i915_vma_remove(struct i915_vma *vma)
>                 struct drm_i915_gem_object *obj = vma->obj;
>
>                 spin_lock(&i915->mm.obj_lock);
> +
> +               GEM_BUG_ON(obj->bind_count == 0);
>                 if (--obj->bind_count == 0 &&
> +                   i915_gem_object_is_shrinkable(obj) &&
>                     obj->mm.madv == I915_MADV_WILLNEED)
>                         list_move_tail(&obj->mm.link, &i915->mm.unbound_list);
> +
>                 spin_unlock(&i915->mm.obj_lock);
>
>                 /*
> --
> 2.20.1
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
_______________________________________________
Intel-gfx mailing list
Intel-gfx@xxxxxxxxxxxxxxxxxxxxx
https://lists.freedesktop.org/mailman/listinfo/intel-gfx