From: Rob Clark <robdclark@xxxxxxxxxxxx> lock_stat + mmm_donut[1] say that this reduces contention on mm_lock significantly (~350x lower waittime-max, and ~100x lower waittime-avg) [1] https://chromium.googlesource.com/chromiumos/platform/microbenchmarks/+/refs/heads/main/mmm_donut.py Signed-off-by: Rob Clark <robdclark@xxxxxxxxxxxx> --- drivers/gpu/drm/msm/msm_drv.h | 3 +- drivers/gpu/drm/msm/msm_gem.c | 2 +- drivers/gpu/drm/msm/msm_gem_shrinker.c | 48 ++++++++++++++++++++++---- 3 files changed, 45 insertions(+), 8 deletions(-) diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h index c84e6f84cb6d..d8d64d34e6e3 100644 --- a/drivers/gpu/drm/msm/msm_drv.h +++ b/drivers/gpu/drm/msm/msm_drv.h @@ -184,7 +184,8 @@ struct msm_drm_private { /** * Lists of inactive GEM objects. Every bo is either in one of the * inactive lists (depending on whether or not it is shrinkable) or - * gpu->active_list (for the gpu it is active on[1]) + * gpu->active_list (for the gpu it is active on[1]), or transiently + * on a temporary list as the shrinker is running. * * These lists are protected by mm_lock (which should be acquired * before per GEM object lock). One should *not* hold mm_lock in diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c index 2ecf7f1cef25..75cea5b801da 100644 --- a/drivers/gpu/drm/msm/msm_gem.c +++ b/drivers/gpu/drm/msm/msm_gem.c @@ -719,7 +719,7 @@ void msm_gem_purge(struct drm_gem_object *obj) put_iova_vmas(obj); msm_obj->madv = __MSM_MADV_PURGED; - mark_unpurgable(msm_obj); + update_inactive(msm_obj); drm_vma_node_unmap(&obj->vma_node, dev->anon_inode->i_mapping); drm_gem_free_mmap_offset(obj); diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c index f3e948af01c5..33a49641ef30 100644 --- a/drivers/gpu/drm/msm/msm_gem_shrinker.c +++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c @@ -22,26 +22,62 @@ msm_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) { struct msm_drm_private *priv = container_of(shrinker, struct msm_drm_private, shrinker); - struct msm_gem_object *msm_obj; + struct list_head still_in_list; unsigned long freed = 0; + INIT_LIST_HEAD(&still_in_list); + mutex_lock(&priv->mm_lock); - list_for_each_entry(msm_obj, &priv->inactive_dontneed, mm_list) { - if (freed >= sc->nr_to_scan) + while (freed < sc->nr_to_scan) { + struct msm_gem_object *msm_obj = list_first_entry_or_null( + &priv->inactive_dontneed, typeof(*msm_obj), mm_list); + + if (!msm_obj) break; - /* Use trylock, because we cannot block on a obj that - * might be trying to acquire mm_lock + + list_move_tail(&msm_obj->mm_list, &still_in_list); + + /* + * If it is in the process of being freed, msm_gem_free_object + * can be blocked on mm_lock waiting to remove it. So just + * skip it. */ - if (!msm_gem_trylock(&msm_obj->base)) + if (!kref_get_unless_zero(&msm_obj->base.refcount)) continue; + + /* + * Now that we own a reference, we can drop mm_lock for the + * rest of the loop body, to reduce contention with the + * retire_submit path (which could make more objects purgable) + */ + + mutex_unlock(&priv->mm_lock); + + /* + * Note that this still needs to be trylock, since we can + * hit shrinker in response to trying to get backing pages + * for this obj (ie. while it's lock is already held) + */ + if (!msm_gem_trylock(&msm_obj->base)) + goto tail; + if (is_purgeable(msm_obj)) { + /* + * This will move the obj out of still_in_list to + * the purged list + */ msm_gem_purge(&msm_obj->base); freed += msm_obj->base.size >> PAGE_SHIFT; } msm_gem_unlock(&msm_obj->base); + +tail: + drm_gem_object_put(&msm_obj->base); + mutex_lock(&priv->mm_lock); } + list_splice_tail(&still_in_list, &priv->inactive_dontneed); mutex_unlock(&priv->mm_lock); if (freed > 0) { -- 2.30.2