On 18/01/2019 14:00, Chris Wilson wrote: > Our goal is to remove struct_mutex and replace it with fine grained > locking. One of the thorny issues is our eviction logic for reclaiming > space for an execbuffer (or GTT mmaping, among a few other examples). > While eviction itself is easy to move under a per-VM mutex, performing > the activity tracking is less agreeable. One solution is not to do any > MRU tracking and do a simple coarse evaluation during eviction of > active/inactive, with a loose temporal ordering of last > insertion/evaluation. That keeps all the locking constrained to when we > are manipulating the VM itself, neatly avoiding the tricky handling of > possible recursive locking during execbuf and elsewhere. > > Note that discarding the MRU is unlikely to impact upon our efficiency > to reclaim VM space (where we think a LRU model is best) as our > current strategy is to use random idle replacement first before doing > a search, and over time the use of softpinned 48b per-ppGTT is growing > (thereby eliminating any need to perform any eviction searches, in > theory at least). There is still no mention of list consolidation. > Signed-off-by: Chris Wilson <chris@xxxxxxxxxxxxxxxxxx> > --- > drivers/gpu/drm/i915/i915_gem.c | 10 +-- > drivers/gpu/drm/i915/i915_gem_evict.c | 71 ++++++++++++------- > drivers/gpu/drm/i915/i915_gem_gtt.c | 15 ++-- > drivers/gpu/drm/i915/i915_gem_gtt.h | 26 +------ > drivers/gpu/drm/i915/i915_gem_shrinker.c | 8 ++- > drivers/gpu/drm/i915/i915_gem_stolen.c | 3 +- > drivers/gpu/drm/i915/i915_gpu_error.c | 37 +++++----- > drivers/gpu/drm/i915/i915_vma.c | 9 +-- > .../gpu/drm/i915/selftests/i915_gem_evict.c | 4 +- > drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 2 +- > 10 files changed, 84 insertions(+), 101 deletions(-) > > diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c > index d20b42386c3c..f45186ddb236 100644 > --- a/drivers/gpu/drm/i915/i915_gem.c > +++ b/drivers/gpu/drm/i915/i915_gem.c > @@ -253,10 +253,7 @@ i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data, > > pinned = ggtt->vm.reserved; > mutex_lock(&dev->struct_mutex); > - list_for_each_entry(vma, &ggtt->vm.active_list, vm_link) > - if (i915_vma_is_pinned(vma)) > - pinned += vma->node.size; > - list_for_each_entry(vma, &ggtt->vm.inactive_list, vm_link) > + list_for_each_entry(vma, &ggtt->vm.bound_list, vm_link) > if (i915_vma_is_pinned(vma)) > pinned += vma->node.size; > mutex_unlock(&dev->struct_mutex); > @@ -1539,13 +1536,10 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj) > GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); > > for_each_ggtt_vma(vma, obj) { > - if (i915_vma_is_active(vma)) > - continue; > - > if (!drm_mm_node_allocated(&vma->node)) > continue; > > - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > + list_move_tail(&vma->vm_link, &vma->vm->bound_list); > } > > i915 = to_i915(obj->base.dev); > diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c > index f6855401f247..5cfe4b75e7d6 100644 > --- a/drivers/gpu/drm/i915/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/i915_gem_evict.c > @@ -126,14 +126,10 @@ i915_gem_evict_something(struct i915_address_space *vm, > struct drm_i915_private *dev_priv = vm->i915; > struct drm_mm_scan scan; > struct list_head eviction_list; > - struct list_head *phases[] = { > - &vm->inactive_list, > - &vm->active_list, > - NULL, > - }, **phase; > struct i915_vma *vma, *next; > struct drm_mm_node *node; > enum drm_mm_insert_mode mode; > + struct i915_vma *active; > int ret; > > lockdep_assert_held(&vm->i915->drm.struct_mutex); This is the existing comment not included in the diff here: /* * The goal is to evict objects and amalgamate space in LRU order. * The oldest idle objects reside on the inactive list, which is in * retirement order. The next objects to retire are those in flight, * on the active list, again in retirement order. * * The retirement sequence is thus: * 1. Inactive objects (already retired) * 2. Active objects (will stall on unbinding) * * On each list, the oldest objects lie at the HEAD with the freshest * object on the TAIL. */ I appeal once more you just reword the first and last paragraph to accurately reflect the fact there is only one list now. > @@ -169,17 +165,46 @@ i915_gem_evict_something(struct i915_address_space *vm, > */ > if (!(flags & PIN_NONBLOCK)) > i915_retire_requests(dev_priv); > - else > - phases[1] = NULL; > > search_again: > + active = NULL; > INIT_LIST_HEAD(&eviction_list); > - phase = phases; > - do { > - list_for_each_entry(vma, *phase, vm_link) > - if (mark_free(&scan, vma, flags, &eviction_list)) > - goto found; > - } while (*++phase); > + list_for_each_entry_safe(vma, next, &vm->bound_list, vm_link) { > + /* > + * We keep this list in a rough least-recently scanned order > + * of active elements (inactive elements are cheap to reap). > + * New entries are added to the end, and we move anything we > + * scan to the end. The assumption is that the working set > + * of applications is either steady state (and thanks to the > + * userspace bo cache it almost always is) or volatile and > + * frequently replaced after a frame, which are self-evicting! > + * Given that assumption, the MRU order of the scan list is > + * fairly static, and keeping it in least-recently scan order > + * is suitable. > + * > + * To notice when we complete one full cycle, we record the > + * first active element seen, before moving it to the tail. > + */ > + if (i915_vma_is_active(vma)) { > + if (vma == active) { > + if (flags & PIN_NONBLOCK) > + break; > + > + active = ERR_PTR(-EAGAIN); > + } > + > + if (active != ERR_PTR(-EAGAIN)) { > + if (!active) > + active = vma; > + > + list_move_tail(&vma->vm_link, &vm->bound_list); > + continue; > + } > + } > + > + if (mark_free(&scan, vma, flags, &eviction_list)) > + goto found; > + } > > /* Nothing found, clean up and bail out! */ > list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { > @@ -388,11 +413,6 @@ int i915_gem_evict_for_node(struct i915_address_space *vm, > */ > int i915_gem_evict_vm(struct i915_address_space *vm) > { > - struct list_head *phases[] = { > - &vm->inactive_list, > - &vm->active_list, > - NULL > - }, **phase; > struct list_head eviction_list; > struct i915_vma *vma, *next; > int ret; > @@ -412,16 +432,13 @@ int i915_gem_evict_vm(struct i915_address_space *vm) > } > > INIT_LIST_HEAD(&eviction_list); > - phase = phases; > - do { > - list_for_each_entry(vma, *phase, vm_link) { > - if (i915_vma_is_pinned(vma)) > - continue; > + list_for_each_entry(vma, &vm->bound_list, vm_link) { > + if (i915_vma_is_pinned(vma)) > + continue; > > - __i915_vma_pin(vma); > - list_add(&vma->evict_link, &eviction_list); > - } > - } while (*++phase); > + __i915_vma_pin(vma); > + list_add(&vma->evict_link, &eviction_list); > + } > > ret = 0; > list_for_each_entry_safe(vma, next, &eviction_list, evict_link) { > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c > index 9081e3bc5a59..2ad9070a54c1 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c > @@ -491,9 +491,8 @@ static void i915_address_space_init(struct i915_address_space *vm, int subclass) > > stash_init(&vm->free_pages); > > - INIT_LIST_HEAD(&vm->active_list); > - INIT_LIST_HEAD(&vm->inactive_list); > INIT_LIST_HEAD(&vm->unbound_list); > + INIT_LIST_HEAD(&vm->bound_list); > } > > static void i915_address_space_fini(struct i915_address_space *vm) > @@ -2111,8 +2110,7 @@ void i915_ppgtt_close(struct i915_address_space *vm) > static void ppgtt_destroy_vma(struct i915_address_space *vm) > { > struct list_head *phases[] = { > - &vm->active_list, > - &vm->inactive_list, > + &vm->bound_list, > &vm->unbound_list, > NULL, > }, **phase; > @@ -2135,8 +2133,7 @@ void i915_ppgtt_release(struct kref *kref) > > ppgtt_destroy_vma(&ppgtt->vm); > > - GEM_BUG_ON(!list_empty(&ppgtt->vm.active_list)); > - GEM_BUG_ON(!list_empty(&ppgtt->vm.inactive_list)); > + GEM_BUG_ON(!list_empty(&ppgtt->vm.bound_list)); > GEM_BUG_ON(!list_empty(&ppgtt->vm.unbound_list)); > > ppgtt->vm.cleanup(&ppgtt->vm); > @@ -2801,8 +2798,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv) > mutex_lock(&dev_priv->drm.struct_mutex); > i915_gem_fini_aliasing_ppgtt(dev_priv); > > - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); > - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) > + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) > WARN_ON(i915_vma_unbind(vma)); > > if (drm_mm_node_allocated(&ggtt->error_capture)) > @@ -3514,8 +3510,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv) > ggtt->vm.closed = true; /* skip rewriting PTE on VMA unbind */ > > /* clflush objects bound into the GGTT and rebind them. */ > - GEM_BUG_ON(!list_empty(&ggtt->vm.active_list)); > - list_for_each_entry_safe(vma, vn, &ggtt->vm.inactive_list, vm_link) { > + list_for_each_entry_safe(vma, vn, &ggtt->vm.bound_list, vm_link) { > struct drm_i915_gem_object *obj = vma->obj; > > if (!(vma->flags & I915_VMA_GLOBAL_BIND)) > diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h > index a0039ea97cdc..bd679c8c56dd 100644 > --- a/drivers/gpu/drm/i915/i915_gem_gtt.h > +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h > @@ -299,32 +299,12 @@ struct i915_address_space { > struct i915_page_directory_pointer *scratch_pdp; /* GEN8+ & 48b PPGTT */ > > /** > - * List of objects currently involved in rendering. > - * > - * Includes buffers having the contents of their GPU caches > - * flushed, not necessarily primitives. last_read_req > - * represents when the rendering involved will be completed. > - * > - * A reference is held on the buffer while on this list. > + * List of vma currently bound. > */ > - struct list_head active_list; > + struct list_head bound_list; > > /** > - * LRU list of objects which are not in the ringbuffer and > - * are ready to unbind, but are still in the GTT. > - * > - * last_read_req is NULL while an object is in this list. > - * > - * A reference is not held on the buffer while on this list, > - * as merely being GTT-bound shouldn't prevent its being > - * freed, and we'll pull it off the list in the free path. > - */ > - struct list_head inactive_list; > - > - /** > - * List of vma that have been unbound. > - * > - * A reference is not held on the buffer while on this list. > + * List of vma that are not unbound. > */ > struct list_head unbound_list; > > diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c > index 8ceecb026910..a76d6c95c824 100644 > --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c > +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c > @@ -462,9 +462,13 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr > > /* We also want to clear any cached iomaps as they wrap vmap */ > list_for_each_entry_safe(vma, next, > - &i915->ggtt.vm.inactive_list, vm_link) { > + &i915->ggtt.vm.bound_list, vm_link) { > unsigned long count = vma->node.size >> PAGE_SHIFT; > - if (vma->iomap && i915_vma_unbind(vma) == 0) > + > + if (!vma->iomap || i915_vma_is_active(vma)) > + continue; > + > + if (i915_vma_unbind(vma) == 0) > freed_pages += count; > } > > diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c > index 9df615eea2d8..a9e365789686 100644 > --- a/drivers/gpu/drm/i915/i915_gem_stolen.c > +++ b/drivers/gpu/drm/i915/i915_gem_stolen.c > @@ -701,7 +701,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv > vma->pages = obj->mm.pages; > vma->flags |= I915_VMA_GLOBAL_BIND; > __i915_vma_set_map_and_fenceable(vma); > - list_move_tail(&vma->vm_link, &ggtt->vm.inactive_list); > + > + list_move_tail(&vma->vm_link, &ggtt->vm.bound_list); > > spin_lock(&dev_priv->mm.obj_lock); > list_move_tail(&obj->mm.link, &dev_priv->mm.bound_list); > diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c > index afccffa9f3f9..6f2fcad0a6ee 100644 > --- a/drivers/gpu/drm/i915/i915_gpu_error.c > +++ b/drivers/gpu/drm/i915/i915_gpu_error.c > @@ -1121,7 +1121,7 @@ static void capture_bo(struct drm_i915_error_buffer *err, > > static u32 capture_error_bo(struct drm_i915_error_buffer *err, > int count, struct list_head *head, > - bool pinned_only) > + bool active_only, bool pinned_only) I still dislike the two booleans since it is unreadable at call sites and only callers are either "true, false" and "false, true". So basically mutually exclusive. It is only a local function but still you did not even bother replying any time I mentioned it (three or four rounds now).. It is okay to stay like this, but stale comments and commit message have to be improved IMO. Regards, Tvrtko > { > struct i915_vma *vma; > int i = 0; > @@ -1130,6 +1130,9 @@ static u32 capture_error_bo(struct drm_i915_error_buffer *err, > if (!vma->obj) > continue; > > + if (active_only && !i915_vma_is_active(vma)) > + continue; > + > if (pinned_only && !i915_vma_is_pinned(vma)) > continue; > > @@ -1599,14 +1602,16 @@ static void gem_capture_vm(struct i915_gpu_state *error, > int count; > > count = 0; > - list_for_each_entry(vma, &vm->active_list, vm_link) > - count++; > + list_for_each_entry(vma, &vm->bound_list, vm_link) > + if (i915_vma_is_active(vma)) > + count++; > > active_bo = NULL; > if (count) > active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC); > if (active_bo) > - count = capture_error_bo(active_bo, count, &vm->active_list, false); > + count = capture_error_bo(active_bo, count, &vm->bound_list, > + true, false); > else > count = 0; > > @@ -1644,28 +1649,20 @@ static void capture_pinned_buffers(struct i915_gpu_state *error) > struct i915_address_space *vm = &error->i915->ggtt.vm; > struct drm_i915_error_buffer *bo; > struct i915_vma *vma; > - int count_inactive, count_active; > - > - count_inactive = 0; > - list_for_each_entry(vma, &vm->inactive_list, vm_link) > - count_inactive++; > + int count; > > - count_active = 0; > - list_for_each_entry(vma, &vm->active_list, vm_link) > - count_active++; > + count = 0; > + list_for_each_entry(vma, &vm->bound_list, vm_link) > + count++; > > bo = NULL; > - if (count_inactive + count_active) > - bo = kcalloc(count_inactive + count_active, > - sizeof(*bo), GFP_ATOMIC); > + if (count) > + bo = kcalloc(count, sizeof(*bo), GFP_ATOMIC); > if (!bo) > return; > > - count_inactive = capture_error_bo(bo, count_inactive, > - &vm->active_list, true); > - count_active = capture_error_bo(bo + count_inactive, count_active, > - &vm->inactive_list, true); > - error->pinned_bo_count = count_inactive + count_active; > + error->pinned_bo_count = > + capture_error_bo(bo, count, &vm->bound_list, false, true); > error->pinned_bo = bo; > } > > diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c > index 5b4d78cdb4ca..7de28baffb8f 100644 > --- a/drivers/gpu/drm/i915/i915_vma.c > +++ b/drivers/gpu/drm/i915/i915_vma.c > @@ -79,9 +79,6 @@ __i915_vma_retire(struct i915_vma *vma, struct i915_request *rq) > if (--vma->active_count) > return; > > - GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > - > GEM_BUG_ON(!i915_gem_object_is_active(obj)); > if (--obj->active_count) > return; > @@ -659,7 +656,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags) > GEM_BUG_ON(!drm_mm_node_allocated(&vma->node)); > GEM_BUG_ON(!i915_gem_valid_gtt_space(vma, cache_level)); > > - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > + list_move_tail(&vma->vm_link, &vma->vm->bound_list); > > if (vma->obj) { > struct drm_i915_gem_object *obj = vma->obj; > @@ -1003,10 +1000,8 @@ int i915_vma_move_to_active(struct i915_vma *vma, > * add the active reference first and queue for it to be dropped > * *last*. > */ > - if (!i915_gem_active_isset(active) && !vma->active_count++) { > - list_move_tail(&vma->vm_link, &vma->vm->active_list); > + if (!i915_gem_active_isset(active) && !vma->active_count++) > obj->active_count++; > - } > i915_gem_active_set(active, rq); > GEM_BUG_ON(!i915_vma_is_active(vma)); > GEM_BUG_ON(!obj->active_count); > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > index 543d618c152b..9e6b9304f2bd 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_evict.c > @@ -90,7 +90,7 @@ static int populate_ggtt(struct drm_i915_private *i915) > goto cleanup; > } > > - if (list_empty(&i915->ggtt.vm.inactive_list)) { > + if (list_empty(&i915->ggtt.vm.bound_list)) { > pr_err("No objects on the GGTT inactive list!\n"); > return -EINVAL; > } > @@ -111,7 +111,7 @@ static void unpin_ggtt(struct drm_i915_private *i915) > { > struct i915_vma *vma; > > - list_for_each_entry(vma, &i915->ggtt.vm.inactive_list, vm_link) > + list_for_each_entry(vma, &i915->ggtt.vm.bound_list, vm_link) > i915_vma_unpin(vma); > } > > diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > index fea8ab14e79d..852b06cb50a0 100644 > --- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > +++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c > @@ -1237,7 +1237,7 @@ static void track_vma_bind(struct i915_vma *vma) > __i915_gem_object_pin_pages(obj); > > vma->pages = obj->mm.pages; > - list_move_tail(&vma->vm_link, &vma->vm->inactive_list); > + list_move_tail(&vma->vm_link, &vma->vm->bound_list); > } > > static int exercise_mock(struct drm_i915_private *i915, > _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx