On Fri, 2024-04-05 at 14:41 +0200, Christian König wrote: > Am 29.03.24 um 15:57 schrieb Thomas Hellström: > > Have iterators insert themselves into the list they are iterating > > over using hitch list nodes. Since only the iterator owner > > can remove these list nodes from the list, it's safe to unlock > > the list and when continuing, use them as a starting point. Due to > > the way LRU bumping works in TTM, newly added items will not be > > missed, and bumped items will be iterated over a second time before > > reaching the end of the list. > > > > The exception is list with bulk move sublists. When bumping a > > sublist, a hitch that is part of that sublist will also be moved > > and we might miss items if restarting from it. This will be > > addressed in a later patch. > > > > v2: > > - Updated ttm_resource_cursor_fini() documentation. > > > > Cc: Christian König <christian.koenig@xxxxxxx> > > Cc: Somalapuram Amaranath <Amaranath.Somalapuram@xxxxxxx> > > Cc: <dri-devel@xxxxxxxxxxxxxxxxxxxxx> > > Signed-off-by: Thomas Hellström <thomas.hellstrom@xxxxxxxxxxxxxxx> > > --- > > drivers/gpu/drm/ttm/ttm_bo.c | 1 + > > drivers/gpu/drm/ttm/ttm_device.c | 9 ++- > > drivers/gpu/drm/ttm/ttm_resource.c | 94 ++++++++++++++++++++----- > > ----- > > include/drm/ttm/ttm_resource.h | 16 +++-- > > 4 files changed, 82 insertions(+), 38 deletions(-) > > > > diff --git a/drivers/gpu/drm/ttm/ttm_bo.c > > b/drivers/gpu/drm/ttm/ttm_bo.c > > index e059b1e1b13b..b6f75a0ff2e5 100644 > > --- a/drivers/gpu/drm/ttm/ttm_bo.c > > +++ b/drivers/gpu/drm/ttm/ttm_bo.c > > @@ -622,6 +622,7 @@ int ttm_mem_evict_first(struct ttm_device > > *bdev, > > if (locked) > > dma_resv_unlock(res->bo->base.resv); > > } > > + ttm_resource_cursor_fini_locked(&cursor); > > > > if (!bo) { > > if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) > > diff --git a/drivers/gpu/drm/ttm/ttm_device.c > > b/drivers/gpu/drm/ttm/ttm_device.c > > index f27406e851e5..e8a6a1dab669 100644 > > --- a/drivers/gpu/drm/ttm/ttm_device.c > > +++ b/drivers/gpu/drm/ttm/ttm_device.c > > @@ -169,12 +169,17 @@ int ttm_device_swapout(struct ttm_device > > *bdev, struct ttm_operation_ctx *ctx, > > num_pages = PFN_UP(bo->base.size); > > ret = ttm_bo_swapout(bo, ctx, gfp_flags); > > /* ttm_bo_swapout has dropped the lru_lock > > */ > > - if (!ret) > > + if (!ret) { > > + ttm_resource_cursor_fini(&cursor); > > return num_pages; > > - if (ret != -EBUSY) > > + } > > + if (ret != -EBUSY) { > > + ttm_resource_cursor_fini(&cursor); > > return ret; > > + } > > } > > } > > + ttm_resource_cursor_fini_locked(&cursor); > > spin_unlock(&bdev->lru_lock); > > return 0; > > } > > diff --git a/drivers/gpu/drm/ttm/ttm_resource.c > > b/drivers/gpu/drm/ttm/ttm_resource.c > > index 7aa5ca5c0e33..ccc31ad85e3b 100644 > > --- a/drivers/gpu/drm/ttm/ttm_resource.c > > +++ b/drivers/gpu/drm/ttm/ttm_resource.c > > @@ -32,6 +32,37 @@ > > > > #include <drm/drm_util.h> > > > > +/** > > + * ttm_resource_cursor_fini_locked() - Finalize the LRU list > > cursor usage > > + * @cursor: The struct ttm_resource_cursor to finalize. > > + * > > + * The function pulls the LRU list cursor off any lists it was > > previusly > > + * attached to. Needs to be called with the LRU lock held. The > > function > > + * can be called multiple times after eachother. > > + */ > > +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor > > *cursor) > > +{ > > + lockdep_assert_held(&cursor->man->bdev->lru_lock); > > + list_del_init(&cursor->hitch.link); > > +} > > + > > +/** > > + * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage > > + * @cursor: The struct ttm_resource_cursor to finalize. > > + * > > + * The function pulls the LRU list cursor off any lists it was > > previusly > > + * attached to. Needs to be called without the LRU list lock held. > > The > > + * function can be called multiple times after eachother. > > + */ > > +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor) > > +{ > > + spinlock_t *lru_lock = &cursor->man->bdev->lru_lock; > > + > > + spin_lock(lru_lock); > > + ttm_resource_cursor_fini_locked(cursor); > > + spin_unlock(lru_lock); > > +} > > + > > /** > > * ttm_lru_bulk_move_init - initialize a bulk move structure > > * @bulk: the structure to init > > @@ -484,62 +515,63 @@ void ttm_resource_manager_debug(struct > > ttm_resource_manager *man, > > EXPORT_SYMBOL(ttm_resource_manager_debug); > > > > /** > > - * ttm_resource_manager_first > > - * > > - * @man: resource manager to iterate over > > + * ttm_resource_manager_next() - Continue iterating over the > > resource manager > > + * resources > > * @cursor: cursor to record the position > > * > > - * Returns the first resource from the resource manager. > > + * Return: The next resource from the resource manager. > > */ > > struct ttm_resource * > > -ttm_resource_manager_first(struct ttm_resource_manager *man, > > - struct ttm_resource_cursor *cursor) > > +ttm_resource_manager_next(struct ttm_resource_cursor *cursor) > > Why are you switching first/next here? It took me a moment to realize > that the two functions switched places. Hmm. That was probably when _first() started using _next(), before realizing they already had declarations. I'll change that back for clarity. > > > { > > + struct ttm_resource_manager *man = cursor->man; > > struct ttm_lru_item *lru; > > > > lockdep_assert_held(&man->bdev->lru_lock); > > > > - for (cursor->priority = 0; cursor->priority < > > TTM_MAX_BO_PRIORITY; > > - ++cursor->priority) > > - list_for_each_entry(lru, &man->lru[cursor- > > >priority], link) { > > - if (ttm_lru_item_is_res(lru)) > > + do { > > + lru = &cursor->hitch; > > + list_for_each_entry_continue(lru, &man- > > >lru[cursor->priority], link) { > > + if (ttm_lru_item_is_res(lru)) { > > + list_move(&cursor->hitch.link, > > &lru->link); > > return ttm_lru_item_to_res(lru); > > + } > > } > > > > + if (++cursor->priority >= TTM_MAX_BO_PRIORITY) > > + break; > > + > > + list_move(&cursor->hitch.link, &man->lru[cursor- > > >priority]); > > + } while (true); > > + > > + list_del_init(&cursor->hitch.link); > > + > > return NULL; > > } > > > > /** > > - * ttm_resource_manager_next > > - * > > + * ttm_resource_manager_first() - Start iterating over the > > resources > > + * of a resource manager > > * @man: resource manager to iterate over > > * @cursor: cursor to record the position > > - * @res: the current resource pointer > > * > > - * Returns the next resource from the resource manager. > > + * Initializes the cursor and starts iterating. When done > > iterating, > > + * the caller must explicitly call ttm_resource_cursor_fini(). > > + * > > + * Return: The first resource from the resource manager. > > */ > > struct ttm_resource * > > -ttm_resource_manager_next(struct ttm_resource_manager *man, > > - struct ttm_resource_cursor *cursor, > > - struct ttm_resource *res) > > +ttm_resource_manager_first(struct ttm_resource_manager *man, > > + struct ttm_resource_cursor *cursor) > > { > > - struct ttm_lru_item *lru = &res->lru; > > - > > lockdep_assert_held(&man->bdev->lru_lock); > > > > - list_for_each_entry_continue(lru, &man->lru[cursor- > > >priority], link) { > > - if (ttm_lru_item_is_res(lru)) > > - return ttm_lru_item_to_res(lru); > > - } > > + cursor->priority = 0; > > + cursor->man = man; > > + ttm_lru_item_init(&cursor->hitch, TTM_LRU_HITCH); > > + list_move(&cursor->hitch.link, &man->lru[cursor- > > >priority]); > > That should probably be a list_add() instead. Yes I can change that. The list head is initialized in ttm_lru_item_init() so it's not really a bug, just slightly inefficient. /Thomas > > Need to take a closer look what actually changed when the functions > keep > their original order. > > Christian. > > > > > - for (++cursor->priority; cursor->priority < > > TTM_MAX_BO_PRIORITY; > > - ++cursor->priority) > > - list_for_each_entry(lru, &man->lru[cursor- > > >priority], link) { > > - if (ttm_lru_item_is_res(lru)) > > - ttm_lru_item_to_res(lru); > > - } > > - > > - return NULL; > > + return ttm_resource_manager_next(cursor); > > } > > > > static void ttm_kmap_iter_iomap_map_local(struct ttm_kmap_iter > > *iter, > > diff --git a/include/drm/ttm/ttm_resource.h > > b/include/drm/ttm/ttm_resource.h > > index 4babc4ff10b0..dfc01258c981 100644 > > --- a/include/drm/ttm/ttm_resource.h > > +++ b/include/drm/ttm/ttm_resource.h > > @@ -271,15 +271,23 @@ ttm_lru_item_to_res(struct ttm_lru_item > > *item) > > > > /** > > * struct ttm_resource_cursor > > - * > > + * @man: The resource manager currently being iterated over > > + * @hitch: A hitch list node inserted before the next resource > > + * to iterate over. > > * @priority: the current priority > > * > > * Cursor to iterate over the resources in a manager. > > */ > > struct ttm_resource_cursor { > > + struct ttm_resource_manager *man; > > + struct ttm_lru_item hitch; > > unsigned int priority; > > }; > > > > +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor > > *cursor); > > + > > +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); > > + > > /** > > * struct ttm_lru_bulk_move_pos > > * > > @@ -438,9 +446,7 @@ struct ttm_resource * > > ttm_resource_manager_first(struct ttm_resource_manager *man, > > struct ttm_resource_cursor *cursor); > > struct ttm_resource * > > -ttm_resource_manager_next(struct ttm_resource_manager *man, > > - struct ttm_resource_cursor *cursor, > > - struct ttm_resource *res); > > +ttm_resource_manager_next(struct ttm_resource_cursor *cursor); > > > > /** > > * ttm_resource_manager_for_each_res - iterate over all resources > > @@ -452,7 +458,7 @@ ttm_resource_manager_next(struct > > ttm_resource_manager *man, > > */ > > #define ttm_resource_manager_for_each_res(man, cursor, > > res) \ > > for (res = ttm_resource_manager_first(man, cursor); > > res; \ > > - res = ttm_resource_manager_next(man, cursor, res)) > > + res = ttm_resource_manager_next(cursor)) > > > > struct ttm_kmap_iter * > > ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, >