On Tue, 28 May 2019 at 20:50, Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> wrote: > > Currently, we try to report to the shrinker the precise number of > objects (pages) that are available to be reaped at this moment. This > requires searching all objects with allocated pages to see if they > fulfill the search criteria, and this count is performed quite > frequently. (The shrinker tries to free ~128 pages on each invocation, > before which we count all the objects; counting takes longer than > unbinding the objects!) If we take the pragmatic view that with > sufficient desire, all objects are eventually reapable (they become > inactive, or no longer used as framebuffer etc), we can simply return > the count of pinned pages maintained during get_pages/put_pages rather > than walk the lists every time. > > The downside is that we may (slightly) over-report the number of > objects/pages we could shrink and so penalize ourselves by shrinking > more than required. This is mitigated by keeping the order in which we > shrink objects such that we avoid penalizing active and frequently used > objects, and if memory is so tight that we need to free them we would > need to anyway. > > v2: Only expose shrinkable objects to the shrinker; a small reduction in > not considering stolen and foreign objects. > > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > Cc: Joonas Lahtinen <joonas.lahtinen@xxxxxxxxxxxxxxx> > Cc: Matthew Auld <matthew.auld@xxxxxxxxx> > --- > drivers/gpu/drm/i915/gem/i915_gem_domain.c | 3 +- > drivers/gpu/drm/i915/gem/i915_gem_object.c | 22 -------- > drivers/gpu/drm/i915/gem/i915_gem_shrinker.c | 28 ++-------- > drivers/gpu/drm/i915/gem/i915_gem_stolen.c | 3 +- > drivers/gpu/drm/i915/i915_debugfs.c | 58 ++------------------ > drivers/gpu/drm/i915/i915_drv.h | 7 +-- > drivers/gpu/drm/i915/i915_gem.c | 23 ++++---- > drivers/gpu/drm/i915/i915_vma.c | 16 ++++-- > 8 files changed, 41 insertions(+), 119 deletions(-) > > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_domain.c b/drivers/gpu/drm/i915/gem/i915_gem_domain.c > index 52b73e90c9f4..e5deae62681f 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_domain.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_domain.c > @@ -475,7 +475,8 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) > } > mutex_unlock(&i915->ggtt.vm.mutex); > > - if (obj->mm.madv == I915_MADV_WILLNEED) { > + if (i915_gem_object_is_shrinkable(obj) && > + obj->mm.madv == I915_MADV_WILLNEED) { > struct list_head *list; > > spin_lock(&i915->mm.obj_lock); > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c > index f064876f1214..1fccb1de5851 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c > @@ -44,25 +44,6 @@ void i915_gem_object_free(struct drm_i915_gem_object *obj) > return kmem_cache_free(global.slab_objects, obj); > } > > -/* some bookkeeping */ > -static void i915_gem_info_add_obj(struct drm_i915_private *i915, > - u64 size) > -{ > - spin_lock(&i915->mm.object_stat_lock); > - i915->mm.object_count++; > - i915->mm.object_memory += size; > - spin_unlock(&i915->mm.object_stat_lock); > -} > - > -static void i915_gem_info_remove_obj(struct drm_i915_private *i915, > - u64 size) > -{ > - spin_lock(&i915->mm.object_stat_lock); > - i915->mm.object_count--; > - i915->mm.object_memory -= size; > - spin_unlock(&i915->mm.object_stat_lock); > -} > - > static void > frontbuffer_retire(struct i915_active_request *active, > struct i915_request *request) > @@ -98,8 +79,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, > obj->mm.madv = I915_MADV_WILLNEED; > INIT_RADIX_TREE(&obj->mm.get_page.radix, GFP_KERNEL | __GFP_NOWARN); > mutex_init(&obj->mm.get_page.lock); > - > - i915_gem_info_add_obj(to_i915(obj->base.dev), obj->base.size); > } > > /** > @@ -240,7 +219,6 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, > > reservation_object_fini(&obj->__builtin_resv); > drm_gem_object_release(&obj->base); > - i915_gem_info_remove_obj(i915, obj->base.size); > > bitmap_free(obj->bit_17); > i915_gem_object_free(obj); > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c > index 6a93e326abf3..d71e630c6fb8 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_shrinker.c > @@ -309,30 +309,14 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) > { > struct drm_i915_private *i915 = > container_of(shrinker, struct drm_i915_private, mm.shrinker); > - struct drm_i915_gem_object *obj; > - unsigned long num_objects = 0; > - unsigned long count = 0; > + unsigned long num_objects; > + unsigned long count; > > - spin_lock(&i915->mm.obj_lock); > - list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) > - if (can_release_pages(obj)) { > - count += obj->base.size >> PAGE_SHIFT; > - num_objects++; > - } > + count = READ_ONCE(i915->mm.shrink_memory) >> PAGE_SHIFT; > + num_objects = READ_ONCE(i915->mm.shrink_count); > > - list_for_each_entry(obj, &i915->mm.bound_list, mm.link) > - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { > - count += obj->base.size >> PAGE_SHIFT; > - num_objects++; > - } > - list_for_each_entry(obj, &i915->mm.purge_list, mm.link) > - if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) { > - count += obj->base.size >> PAGE_SHIFT; > - num_objects++; > - } > - spin_unlock(&i915->mm.obj_lock); > - > - /* Update our preferred vmscan batch size for the next pass. > + /* > + * Update our preferred vmscan batch size for the next pass. > * Our rough guess for an effective batch size is roughly 2 > * available GEM objects worth of pages. That is we don't want > * the shrinker to fire, until it is worth the cost of freeing an > diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > index 9080a736663a..8b3a23bff7f6 100644 > --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c > @@ -690,7 +690,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv > mutex_unlock(&ggtt->vm.mutex); > > spin_lock(&dev_priv->mm.obj_lock); > - list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); > + if (i915_gem_object_is_shrinkable(obj)) > + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); > obj->bind_count++; > spin_unlock(&dev_priv->mm.obj_lock); > > diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c > index e415d7ef90f2..214dbd698d8e 100644 > --- a/drivers/gpu/drm/i915/i915_debugfs.c > +++ b/drivers/gpu/drm/i915/i915_debugfs.c > @@ -271,7 +271,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) > unsigned long total, count, n; > int ret; > > - total = READ_ONCE(dev_priv->mm.object_count); > + total = READ_ONCE(dev_priv->mm.shrink_count); > objects = kvmalloc_array(total, sizeof(*objects), GFP_KERNEL); > if (!objects) > return -ENOMEM; > @@ -460,9 +460,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data) > char buf[80]; > int ret; > > - seq_printf(m, "%u objects, %llu bytes\n", > - dev_priv->mm.object_count, > - dev_priv->mm.object_memory); > + seq_printf(m, "%u shrinkable objects, %llu bytes\n", > + dev_priv->mm.shrink_count, > + dev_priv->mm.shrink_memory); > > size = count = 0; > mapped_size = mapped_count = 0; > @@ -552,55 +552,6 @@ static int i915_gem_object_info(struct seq_file *m, void *data) > return 0; > } > > -static int i915_gem_gtt_info(struct seq_file *m, void *data) > -{ > - struct drm_info_node *node = m->private; > - struct drm_i915_private *dev_priv = node_to_i915(node); > - struct drm_device *dev = &dev_priv->drm; > - struct drm_i915_gem_object **objects; > - struct drm_i915_gem_object *obj; > - u64 total_obj_size, total_gtt_size; > - unsigned long nobject, n; > - int count, ret; > - > - nobject = READ_ONCE(dev_priv->mm.object_count); > - objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL); > - if (!objects) > - return -ENOMEM; > - > - ret = mutex_lock_interruptible(&dev->struct_mutex); > - if (ret) > - return ret; > - > - count = 0; > - spin_lock(&dev_priv->mm.obj_lock); > - list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { > - objects[count++] = obj; > - if (count == nobject) > - break; > - } > - spin_unlock(&dev_priv->mm.obj_lock); > - > - total_obj_size = total_gtt_size = 0; > - for (n = 0; n < count; n++) { > - obj = objects[n]; > - > - seq_puts(m, " "); > - describe_obj(m, obj); > - seq_putc(m, '\n'); > - total_obj_size += obj->base.size; > - total_gtt_size += i915_gem_obj_total_ggtt_size(obj); > - } > - > - mutex_unlock(&dev->struct_mutex); > - > - seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", > - count, total_obj_size, total_gtt_size); > - kvfree(objects); > - > - return 0; > -} > - > static int i915_gem_batch_pool_info(struct seq_file *m, void *data) > { > struct drm_i915_private *dev_priv = node_to_i915(m->private); > @@ -4584,7 +4535,6 @@ static const struct file_operations i915_fifo_underrun_reset_ops = { > static const struct drm_info_list i915_debugfs_list[] = { > {"i915_capabilities", i915_capabilities, 0}, > {"i915_gem_objects", i915_gem_object_info, 0}, > - {"i915_gem_gtt", i915_gem_gtt_info, 0}, > {"i915_gem_stolen", i915_gem_stolen_list_info }, > {"i915_gem_fence_regs", i915_gem_fence_regs_info, 0}, > {"i915_gem_interrupt", i915_interrupt_info, 0}, > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index fb2e89133e78..770c54b87de6 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -926,10 +926,9 @@ struct i915_gem_mm { > /** Bit 6 swizzling required for Y tiling */ > u32 bit_6_swizzle_y; > > - /* accounting, useful for userland debugging */ > - spinlock_t object_stat_lock; > - u64 object_memory; > - u32 object_count; > + /* shrinker accounting, also useful for userland debugging */ > + u64 shrink_memory; > + u32 shrink_count; Colour me confused. I can't see where we set these? Or is my brain fried? > }; > > #define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */ > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d98ccbbde53c..0a5049aec144 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -1137,15 +1137,17 @@ i915_gem_madvise_ioctl(struct drm_device *dev, void *data, > if (i915_gem_object_has_pages(obj)) { > struct list_head *list; > > - spin_lock(&i915->mm.obj_lock); > - if (obj->mm.madv != I915_MADV_WILLNEED) > - list = &i915->mm.purge_list; > - else if (obj->bind_count) > - list = &i915->mm.bound_list; > - else > - list = &i915->mm.unbound_list; > - list_move_tail(&obj->mm.link, list); > - spin_unlock(&i915->mm.obj_lock); > + if (i915_gem_object_is_shrinkable(obj)) { > + spin_lock(&i915->mm.obj_lock); > + if (obj->mm.madv != I915_MADV_WILLNEED) > + list = &i915->mm.purge_list; > + else if (obj->bind_count) > + list = &i915->mm.bound_list; > + else > + list = &i915->mm.unbound_list; > + list_move_tail(&obj->mm.link, list); > + spin_unlock(&i915->mm.obj_lock); > + } > } > > /* if the object is no longer attached, discard its backing storage */ > @@ -1750,7 +1752,6 @@ i915_gem_load_init_fences(struct drm_i915_private *dev_priv) > > static void i915_gem_init__mm(struct drm_i915_private *i915) > { > - spin_lock_init(&i915->mm.object_stat_lock); > spin_lock_init(&i915->mm.obj_lock); > spin_lock_init(&i915->mm.free_lock); > > @@ -1800,7 +1801,7 @@ void i915_gem_cleanup_early(struct drm_i915_private *dev_priv) > i915_gem_drain_freed_objects(dev_priv); > GEM_BUG_ON(!llist_empty(&dev_priv->mm.free_list)); > GEM_BUG_ON(atomic_read(&dev_priv->mm.free_count)); > - WARN_ON(dev_priv->mm.object_count); > + WARN_ON(dev_priv->mm.shrink_count); > > cleanup_srcu_struct(&dev_priv->gpu_error.reset_backoff_srcu); > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index f640caec4bae..b7fb7d216f77 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -110,7 +110,8 @@ static void __i915_vma_retire(struct i915_active *ref) > * so that we don't steal from recently used but inactive objects > * (unless we are forced to ofc!) > */ > - obj_bump_mru(obj); > + if (i915_gem_object_is_shrinkable(obj)) > + obj_bump_mru(obj); > > i915_gem_object_put(obj); /* and drop the active reference */ > } > @@ -677,11 +678,14 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > struct drm_i915_gem_object *obj = vma->obj; > > spin_lock(&dev_priv->mm.obj_lock); > - list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); > - obj->bind_count++; > - spin_unlock(&dev_priv->mm.obj_lock); > > + if (i915_gem_object_is_shrinkable(obj)) > + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); > + > + obj->bind_count++; > assert_bind_count(obj); > + > + spin_unlock(&dev_priv->mm.obj_lock); > } > > return 0; > @@ -717,9 +721,13 @@ i915_vma_remove(struct i915_vma *vma) > struct drm_i915_gem_object *obj = vma->obj; > > spin_lock(&i915->mm.obj_lock); > + > + GEM_BUG_ON(obj->bind_count == 0); > if (--obj->bind_count == 0 && > + i915_gem_object_is_shrinkable(obj) && > obj->mm.madv == I915_MADV_WILLNEED) > list_move_tail(&obj->mm.link, &i915->mm.unbound_list); > + > spin_unlock(&i915->mm.obj_lock); > > /* > -- > 2.20.1 > > _______________________________________________ > Intel-gfx mailing list > Intel-gfx@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/intel-gfx _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx