Remove the struct_mutex requirement around dev_priv->mm.bound_list and dev_priv->mm.unbound_list by giving it its own spinlock. This reduces one more requirement for struct_mutex and in the process gives us slightly more accurate unbound_list tracking, which should improve the shrinker. Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> --- drivers/gpu/drm/i915/i915_debugfs.c | 39 ++++++++++++--- drivers/gpu/drm/i915/i915_drv.h | 2 + drivers/gpu/drm/i915/i915_gem.c | 41 ++++++++++++--- drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- drivers/gpu/drm/i915/i915_gem_object.h | 7 ++- drivers/gpu/drm/i915/i915_gem_shrinker.c | 61 ++++++++++------------- drivers/gpu/drm/i915/i915_gem_stolen.c | 5 +- drivers/gpu/drm/i915/i915_vma.c | 16 ++++-- drivers/gpu/drm/i915/selftests/i915_gem_context.c | 2 +- drivers/gpu/drm/i915/selftests/i915_gem_evict.c | 8 ++- 10 files changed, 120 insertions(+), 64 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index 77c0c255c477..73e0a325614d 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -238,7 +238,9 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) goto out; total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (count == total) break; @@ -250,7 +252,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) total_gtt_size += i915_gem_obj_total_ggtt_size(obj); } - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (count == total) break; @@ -260,6 +262,7 @@ static int i915_gem_stolen_list_info(struct seq_file *m, void *data) objects[count++] = obj; total_obj_size += obj->base.size; } + spin_unlock(&dev_priv->mm.obj_lock); sort(objects, count, sizeof(*objects), obj_rank_by_stolen, NULL); @@ -418,7 +421,9 @@ static int i915_gem_object_info(struct seq_file *m, void *data) size = count = 0; mapped_size = mapped_count = 0; purgeable_size = purgeable_count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { + + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { size += obj->base.size; ++count; @@ -435,7 +440,7 @@ static int i915_gem_object_info(struct seq_file *m, void *data) seq_printf(m, "%u unbound objects, %llu bytes\n", count, size); size = count = dpy_size = dpy_count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { size += obj->base.size; ++count; @@ -454,6 +459,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data) mapped_size += obj->base.size; } } + spin_unlock(&dev_priv->mm.obj_lock); + seq_printf(m, "%u bound objects, %llu bytes\n", count, size); seq_printf(m, "%u purgeable objects, %llu bytes\n", @@ -514,31 +521,49 @@ static int i915_gem_gtt_info(struct seq_file *m, void *data) struct drm_i915_private *dev_priv = node_to_i915(node); struct drm_device *dev = &dev_priv->drm; bool show_pin_display_only = !!node->info_ent->data; + struct drm_i915_gem_object **objects; struct drm_i915_gem_object *obj; u64 total_obj_size, total_gtt_size; + unsigned long nobject, n; int count, ret; + nobject = READ_ONCE(dev_priv->mm.object_count); + objects = kvmalloc_array(nobject, sizeof(*objects), GFP_KERNEL); + if (!objects) + return -ENOMEM; + ret = mutex_lock_interruptible(&dev->struct_mutex); if (ret) return ret; - total_obj_size = total_gtt_size = count = 0; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + count = 0; + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (show_pin_display_only && !obj->pin_display) continue; + objects[count++] = obj; + if (count == nobject) + break; + } + spin_unlock(&dev_priv->mm.obj_lock); + + total_obj_size = total_gtt_size = 0; + for (n = 0; n < count; n++) { + obj = objects[n]; + seq_puts(m, " "); describe_obj(m, obj); seq_putc(m, '\n'); total_obj_size += obj->base.size; total_gtt_size += i915_gem_obj_total_ggtt_size(obj); - count++; } mutex_unlock(&dev->struct_mutex); seq_printf(m, "Total %d objects, %llu bytes, %llu GTT size\n", count, total_obj_size, total_gtt_size); + kvfree(objects); return 0; } diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index a908d1ba05d5..856214fe28b2 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -1443,6 +1443,8 @@ struct i915_gem_mm { * always the inner lock when overlapping with struct_mutex. */ struct mutex stolen_lock; + spinlock_t obj_lock; + /** List of all objects in gtt_space. Used to restore gtt * mappings on resume */ struct list_head bound_list; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index 858b3bb6846e..bcfaa86451bf 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1576,9 +1576,12 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) list_move_tail(&vma->vm_link, &vma->vm->inactive_list); } + GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); i915 = to_i915(obj->base.dev); + spin_lock(&i915->mm.obj_lock); list = obj->bind_count ? &i915->mm.bound_list : &i915->mm.unbound_list; - list_move_tail(&obj->global_link, list); + list_move_tail(&obj->mm.link, list); + spin_unlock(&i915->mm.obj_lock); } /** @@ -2261,6 +2264,7 @@ static void __i915_gem_object_reset_page_iter(struct drm_i915_gem_object *obj) void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, enum i915_mm_subclass subclass) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); struct sg_table *pages; if (i915_gem_object_has_pinned_pages(obj)) @@ -2281,6 +2285,10 @@ void __i915_gem_object_put_pages(struct drm_i915_gem_object *obj, pages = fetch_and_zero(&obj->mm.pages); GEM_BUG_ON(!pages); + spin_lock(&i915->mm.obj_lock); + list_del(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + if (obj->mm.mapping) { void *ptr; @@ -2497,6 +2505,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj) void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, struct sg_table *pages) { + struct drm_i915_private *i915 = to_i915(obj->base.dev); + lockdep_assert_held(&obj->mm.lock); obj->mm.get_page.sg_pos = pages->sgl; @@ -2505,11 +2515,15 @@ void __i915_gem_object_set_pages(struct drm_i915_gem_object *obj, obj->mm.pages = pages; if (i915_gem_object_is_tiled(obj) && - to_i915(obj->base.dev)->quirks & QUIRK_PIN_SWIZZLED_PAGES) { + i915->quirks & QUIRK_PIN_SWIZZLED_PAGES) { GEM_BUG_ON(obj->mm.quirked); __i915_gem_object_pin_pages(obj); obj->mm.quirked = true; } + + spin_lock(&i915->mm.obj_lock); + list_add(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); } static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj) @@ -4286,7 +4300,6 @@ void i915_gem_object_init(struct drm_i915_gem_object *obj, { mutex_init(&obj->mm.lock); - INIT_LIST_HEAD(&obj->global_link); INIT_LIST_HEAD(&obj->userfault_link); INIT_LIST_HEAD(&obj->vma_list); INIT_LIST_HEAD(&obj->batch_pool_link); @@ -4434,7 +4447,18 @@ static void __i915_gem_free_objects(struct drm_i915_private *i915, GEM_BUG_ON(!list_empty(&obj->vma_list)); GEM_BUG_ON(!RB_EMPTY_ROOT(&obj->vma_tree)); - list_del(&obj->global_link); + /* This serializes freeing with the shrinker. Since the free + * is delayed, first by RCU then by the workqueue, we want the + * shrinker to be able to free pages of unreferenced objects, + * or else we may oom whilst there are plenty of deferred + * freed objects. + */ + if (i915_gem_object_has_pages(obj)) { + spin_lock(&i915->mm.obj_lock); + list_del_init(&obj->mm.link); + spin_unlock(&i915->mm.obj_lock); + } + } intel_runtime_pm_put(i915); mutex_unlock(&i915->drm.struct_mutex); @@ -4951,11 +4975,14 @@ i915_gem_load_init(struct drm_i915_private *dev_priv) goto err_priorities; INIT_WORK(&dev_priv->mm.free_work, __i915_gem_free_work); + + spin_lock_init(&dev_priv->mm.obj_lock); init_llist_head(&dev_priv->mm.free_list); INIT_LIST_HEAD(&dev_priv->mm.unbound_list); INIT_LIST_HEAD(&dev_priv->mm.bound_list); INIT_LIST_HEAD(&dev_priv->mm.fence_list); INIT_LIST_HEAD(&dev_priv->mm.userfault_list); + INIT_DELAYED_WORK(&dev_priv->gt.retire_work, i915_gem_retire_work_handler); INIT_DELAYED_WORK(&dev_priv->gt.idle_work, @@ -5040,12 +5067,12 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); i915_gem_drain_freed_objects(dev_priv); - mutex_lock(&dev_priv->drm.struct_mutex); + spin_lock(&dev_priv->mm.obj_lock); for (p = phases; *p; p++) { - list_for_each_entry(obj, *p, global_link) + list_for_each_entry(obj, *p, mm.link) __start_cpu_write(obj); } - mutex_unlock(&dev_priv->drm.struct_mutex); + spin_unlock(&dev_priv->mm.obj_lock); return 0; } diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index ad4e48435853..1158559d1ab2 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -3182,8 +3182,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */ /* clflush objects bound into the GGTT and rebind them. */ - list_for_each_entry_safe(obj, on, - &dev_priv->mm.bound_list, global_link) { + list_for_each_entry_safe(obj, on, &dev_priv->mm.bound_list, mm.link) { bool ggtt_bound = false; struct i915_vma *vma; diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h index 5b19a4916a4d..b6425f163c8e 100644 --- a/drivers/gpu/drm/i915/i915_gem_object.h +++ b/drivers/gpu/drm/i915/i915_gem_object.h @@ -90,7 +90,6 @@ struct drm_i915_gem_object { /** Stolen memory for this object, instead of being backed by shmem. */ struct drm_mm_node *stolen; - struct list_head global_link; union { struct rcu_head rcu; struct llist_node freed; @@ -152,6 +151,12 @@ struct drm_i915_gem_object { } get_page; /** + * Element within i915->mm.unbound_list or i915->mm.bound_list, + * locked by i915->mm.obj_lock. + */ + struct list_head link; + + /** * Advice: are the backing pages purgeable? */ unsigned int madv:2; diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 1668a62619fb..ad129783d124 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -92,9 +92,6 @@ static bool swap_available(void) static bool can_release_pages(struct drm_i915_gem_object *obj) { - if (!i915_gem_object_has_pages(obj)) - return false; - /* Consider only shrinkable ojects. */ if (!i915_gem_object_is_shrinkable(obj)) return false; @@ -208,15 +205,20 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, continue; INIT_LIST_HEAD(&still_in_list); + + /* + * We serialize our access to unreferenced objects through + * the use of the struct_mutex. While the objects are not + * yet freed (due to RCU then a workqueue) we still want + * to be able to shrink their pages, so they remain on + * the unbound/bound list until actually freed. + */ + spin_lock(&dev_priv->mm.obj_lock); while (count < target && (obj = list_first_entry_or_null(phase->list, typeof(*obj), - global_link))) { - list_move_tail(&obj->global_link, &still_in_list); - if (!obj->mm.pages) { - list_del_init(&obj->global_link); - continue; - } + mm.link))) { + list_move_tail(&obj->mm.link, &still_in_list); if (flags & I915_SHRINK_PURGEABLE && obj->mm.madv != I915_MADV_DONTNEED) @@ -234,19 +236,23 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, if (!can_release_pages(obj)) continue; + spin_unlock(&dev_priv->mm.obj_lock); + if (unsafe_drop_pages(obj)) { /* May arrive from get_pages on another bo */ mutex_lock_nested(&obj->mm.lock, I915_MM_SHRINKER); if (!i915_gem_object_has_pages(obj)) { __i915_gem_object_invalidate(obj); - list_del_init(&obj->global_link); count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); } + + spin_lock(&dev_priv->mm.obj_lock); } list_splice_tail(&still_in_list, phase->list); + spin_unlock(&dev_priv->mm.obj_lock); } if (flags & I915_SHRINK_BOUND) @@ -293,25 +299,18 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc) struct drm_i915_private *dev_priv = container_of(shrinker, struct drm_i915_private, mm.shrinker); struct drm_i915_gem_object *obj; - unsigned long count; - bool unlock; - - if (!shrinker_lock(dev_priv, &unlock)) - return 0; - - i915_gem_retire_requests(dev_priv); + unsigned long count = 0; - count = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) if (can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (!i915_gem_object_is_active(obj) && can_release_pages(obj)) count += obj->base.size >> PAGE_SHIFT; } - - shrinker_unlock(dev_priv, unlock); + spin_unlock(&dev_priv->mm.obj_lock); return count; } @@ -383,10 +382,6 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) container_of(nb, struct drm_i915_private, mm.oom_notifier); struct drm_i915_gem_object *obj; unsigned long unevictable, bound, unbound, freed_pages; - bool unlock; - - if (!shrinker_lock_uninterruptible(dev_priv, &unlock, 5000)) - return NOTIFY_DONE; freed_pages = i915_gem_shrink_all(dev_priv); @@ -395,26 +390,20 @@ i915_gem_shrinker_oom(struct notifier_block *nb, unsigned long event, void *ptr) * being pointed to by hardware. */ unbound = bound = unevictable = 0; - list_for_each_entry(obj, &dev_priv->mm.unbound_list, global_link) { - if (!i915_gem_object_has_pages(obj)) - continue; - + spin_lock(&dev_priv->mm.obj_lock); + list_for_each_entry(obj, &dev_priv->mm.unbound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else unbound += obj->base.size >> PAGE_SHIFT; } - list_for_each_entry(obj, &dev_priv->mm.bound_list, global_link) { - if (!i915_gem_object_has_pages(obj)) - continue; - + list_for_each_entry(obj, &dev_priv->mm.bound_list, mm.link) { if (!can_release_pages(obj)) unevictable += obj->base.size >> PAGE_SHIFT; else bound += obj->base.size >> PAGE_SHIFT; } - - shrinker_unlock(dev_priv, unlock); + spin_unlock(&dev_priv->mm.obj_lock); if (freed_pages || unbound || bound) pr_info("Purging GPU memory, %lu pages freed, " diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c index c11c915382e7..a6e49d1bd389 100644 --- a/drivers/gpu/drm/i915/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c @@ -717,8 +717,11 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv vma->flags |= I915_VMA_GLOBAL_BIND; __i915_vma_set_map_and_fenceable(vma); list_move_tail(&vma->vm_link, &ggtt->base.inactive_list); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); return obj; diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c index 0c489090d4ab..fbb12a39ffc1 100644 --- a/drivers/gpu/drm/i915/i915_vma.c +++ b/drivers/gpu/drm/i915/i915_vma.c @@ -58,8 +58,10 @@ i915_vma_retire(struct i915_gem_active *active, * so that we don't steal from recently used but inactive objects * (unless we are forced to ofc!) */ + spin_lock(&rq->i915->mm.obj_lock); if (obj->bind_count) - list_move_tail(&obj->global_link, &rq->i915->mm.bound_list); + list_move_tail(&obj->mm.link, &rq->i915->mm.bound_list); + spin_unlock(&rq->i915->mm.obj_lock); obj->mm.dirty = true; /* be paranoid */ @@ -515,9 +517,13 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level)); - list_move_tail(&obj->global_link, &dev_priv->mm.bound_list); list_move_tail(&vma->vm_link, &vma->vm->inactive_list); + + spin_lock(&dev_priv->mm.obj_lock); + list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); obj->bind_count++; + spin_unlock(&dev_priv->mm.obj_lock); + GEM_BUG_ON(atomic_read(&obj->mm.pages_pin_count) < obj->bind_count); return 0; @@ -530,6 +536,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) static void i915_vma_remove(struct i915_vma *vma) { + struct drm_i915_private *i915 = vma->vm->i915; struct drm_i915_gem_object *obj = vma->obj; GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); @@ -541,9 +548,10 @@ i915_vma_remove(struct i915_vma *vma) /* Since the unbound list is global, only move to that list if * no more VMAs exist. */ + spin_lock(&i915->mm.obj_lock); if (--obj->bind_count == 0) - list_move_tail(&obj->global_link, - &to_i915(obj->base.dev)->mm.unbound_list); + list_move_tail(&obj->mm.link, &i915->mm.unbound_list); + spin_unlock(&i915->mm.obj_lock); /* And finally now the object is completely decoupled from this vma, * we can drop its hold on the backing storage and allow it to be diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/selftests/i915_gem_context.c index 12b85b3278cd..6a98a5e86d49 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_context.c @@ -415,7 +415,7 @@ static int fake_aliasing_ppgtt_enable(struct drm_i915_private *i915) if (err) return err; - list_for_each_entry(obj, &i915->mm.bound_list, global_link) { + list_for_each_entry(obj, &i915->mm.bound_list, mm.link) { struct i915_vma *vma; vma = i915_vma_instance(obj, &i915->ggtt.base, NULL); diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c index 5ea373221f49..d423a46bf019 100644 --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c @@ -47,7 +47,7 @@ static int populate_ggtt(struct drm_i915_private *i915) if (!list_empty(&i915->mm.unbound_list)) { size = 0; - list_for_each_entry(obj, &i915->mm.unbound_list, global_link) + list_for_each_entry(obj, &i915->mm.unbound_list, mm.link) size++; pr_err("Found %lld objects unbound!\n", size); @@ -74,10 +74,10 @@ static void cleanup_objects(struct drm_i915_private *i915) { struct drm_i915_gem_object *obj, *on; - list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.unbound_list, mm.link) i915_gem_object_put(obj); - list_for_each_entry_safe(obj, on, &i915->mm.bound_list, global_link) + list_for_each_entry_safe(obj, on, &i915->mm.bound_list, mm.link) i915_gem_object_put(obj); mutex_unlock(&i915->drm.struct_mutex); @@ -149,8 +149,6 @@ static int igt_overcommit(void *arg) goto cleanup; } - list_move(&obj->global_link, &i915->mm.unbound_list); - vma = i915_gem_object_ggtt_pin(obj, NULL, 0, 0, 0); if (!IS_ERR(vma) || PTR_ERR(vma) != -ENOSPC) { pr_err("Failed to evict+insert, i915_gem_object_ggtt_pin returned err=%d\n", (int)PTR_ERR(vma)); -- 2.13.3 _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx