On Mon 27-09-21 00:48:23, Sultan Alsawaf wrote: > From: Sultan Alsawaf <sultan@xxxxxxxxxxxxxxx> > > The shrinker rwsem is problematic because the actual shrinking path must > back off when contention appears, causing some or all shrinkers to be > skipped. This can be especially bad when shrinkers are frequently > registered and unregistered. A high frequency of shrinker registrations/ > unregistrations can effectively DoS the shrinker mechanism, rendering it > useless. Can you be more specific about those scenarios please? > Using SRCU to protect iteration through the shrinker list and idr > eliminates this issue entirely. Now, shrinking can happen concurrently > with shrinker registrations/unregistrations. I have a vague recollection that this approach has been proposed in the past. Have you checked previous attempts? > Signed-off-by: Sultan Alsawaf <sultan@xxxxxxxxxxxxxxx> > --- > mm/vmscan.c | 68 ++++++++++++++++++++++++++++------------------------- > 1 file changed, 36 insertions(+), 32 deletions(-) > > diff --git a/mm/vmscan.c b/mm/vmscan.c > index 74296c2d1fed..3dea927be5ad 100644 > --- a/mm/vmscan.c > +++ b/mm/vmscan.c > @@ -190,6 +190,7 @@ static void set_task_reclaim_state(struct task_struct *task, > > static LIST_HEAD(shrinker_list); > static DECLARE_RWSEM(shrinker_rwsem); > +DEFINE_STATIC_SRCU(shrinker_srcu); > > #ifdef CONFIG_MEMCG > static int shrinker_nr_max; > @@ -212,6 +213,20 @@ static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg, > lockdep_is_held(&shrinker_rwsem)); > } > > +static struct shrinker_info *shrinker_info_srcu(struct mem_cgroup *memcg, > + int nid) > +{ > + return srcu_dereference(memcg->nodeinfo[nid]->shrinker_info, > + &shrinker_srcu); > +} > + > +static void free_shrinker_info_srcu(struct rcu_head *head) > +{ > + struct shrinker_info *info = container_of(head, typeof(*info), rcu); > + > + kvfree_rcu(info, rcu); > +} > + > static int expand_one_shrinker_info(struct mem_cgroup *memcg, > int map_size, int defer_size, > int old_map_size, int old_defer_size) > @@ -244,7 +259,12 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, > defer_size - old_defer_size); > > rcu_assign_pointer(pn->shrinker_info, new); > - kvfree_rcu(old, rcu); > + > + /* > + * Shrinker info is used under both SRCU and regular RCU, so > + * synchronize the free against both of them. > + */ > + call_srcu(&shrinker_srcu, &old->rcu, free_shrinker_info_srcu); > } > > return 0; > @@ -357,7 +377,6 @@ static int prealloc_memcg_shrinker(struct shrinker *shrinker) > return -ENOSYS; > > down_write(&shrinker_rwsem); > - /* This may call shrinker, so it must use down_read_trylock() */ > id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL); > if (id < 0) > goto unlock; > @@ -391,7 +410,7 @@ static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker, > { > struct shrinker_info *info; > > - info = shrinker_info_protected(memcg, nid); > + info = shrinker_info_srcu(memcg, nid); > return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0); > } > > @@ -400,7 +419,7 @@ static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker, > { > struct shrinker_info *info; > > - info = shrinker_info_protected(memcg, nid); > + info = shrinker_info_srcu(memcg, nid); > return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]); > } > > @@ -641,6 +660,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker) > down_write(&shrinker_rwsem); > unregister_memcg_shrinker(shrinker); > up_write(&shrinker_rwsem); > + synchronize_srcu(&shrinker_srcu); > return; > } > > @@ -651,7 +671,7 @@ void free_prealloced_shrinker(struct shrinker *shrinker) > void register_shrinker_prepared(struct shrinker *shrinker) > { > down_write(&shrinker_rwsem); > - list_add_tail(&shrinker->list, &shrinker_list); > + list_add_tail_rcu(&shrinker->list, &shrinker_list); > shrinker->flags |= SHRINKER_REGISTERED; > up_write(&shrinker_rwsem); > } > @@ -676,11 +696,12 @@ void unregister_shrinker(struct shrinker *shrinker) > return; > > down_write(&shrinker_rwsem); > - list_del(&shrinker->list); > + list_del_rcu(&shrinker->list); > shrinker->flags &= ~SHRINKER_REGISTERED; > if (shrinker->flags & SHRINKER_MEMCG_AWARE) > unregister_memcg_shrinker(shrinker); > up_write(&shrinker_rwsem); > + synchronize_srcu(&shrinker_srcu); > > kfree(shrinker->nr_deferred); > shrinker->nr_deferred = NULL; > @@ -792,15 +813,13 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, > { > struct shrinker_info *info; > unsigned long ret, freed = 0; > - int i; > + int i, srcu_idx; > > if (!mem_cgroup_online(memcg)) > return 0; > > - if (!down_read_trylock(&shrinker_rwsem)) > - return 0; > - > - info = shrinker_info_protected(memcg, nid); > + srcu_idx = srcu_read_lock(&shrinker_srcu); > + info = shrinker_info_srcu(memcg, nid); > if (unlikely(!info)) > goto unlock; > > @@ -850,14 +869,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, > set_shrinker_bit(memcg, nid, i); > } > freed += ret; > - > - if (rwsem_is_contended(&shrinker_rwsem)) { > - freed = freed ? : 1; > - break; > - } > } > unlock: > - up_read(&shrinker_rwsem); > + srcu_read_unlock(&shrinker_srcu, srcu_idx); > return freed; > } > #else /* CONFIG_MEMCG */ > @@ -894,6 +908,7 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, > { > unsigned long ret, freed = 0; > struct shrinker *shrinker; > + int srcu_idx; > > /* > * The root memcg might be allocated even though memcg is disabled > @@ -905,10 +920,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, > if (!mem_cgroup_disabled() && !mem_cgroup_is_root(memcg)) > return shrink_slab_memcg(gfp_mask, nid, memcg, priority); > > - if (!down_read_trylock(&shrinker_rwsem)) > - goto out; > - > - list_for_each_entry(shrinker, &shrinker_list, list) { > + srcu_idx = srcu_read_lock(&shrinker_srcu); > + list_for_each_entry_srcu(shrinker, &shrinker_list, list, > + srcu_read_lock_held(&shrinker_srcu)) { > struct shrink_control sc = { > .gfp_mask = gfp_mask, > .nid = nid, > @@ -919,19 +933,9 @@ static unsigned long shrink_slab(gfp_t gfp_mask, int nid, > if (ret == SHRINK_EMPTY) > ret = 0; > freed += ret; > - /* > - * Bail out if someone want to register a new shrinker to > - * prevent the registration from being stalled for long periods > - * by parallel ongoing shrinking. > - */ > - if (rwsem_is_contended(&shrinker_rwsem)) { > - freed = freed ? : 1; > - break; > - } > } > + srcu_read_unlock(&shrinker_srcu, srcu_idx); > > - up_read(&shrinker_rwsem); > -out: > cond_resched(); > return freed; > } > -- > 2.33.0 -- Michal Hocko SUSE Labs