On 2023/2/23 04:05, Kirill Tkhai wrote:
On 22.02.2023 11:21, Qi Zheng wrote:On 2023/2/22 16:16, Qi Zheng wrote:Hi Kirill, On 2023/2/22 05:43, Kirill Tkhai wrote:On 20.02.2023 12:16, Qi Zheng wrote:Like global slab shrink, since commit 1cd0bd06093c<...> static bool cgroup_reclaim(struct scan_control *sc) @@ -891,15 +905,14 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, { struct shrinker_info *info; unsigned long ret, freed = 0; + int srcu_idx; int i; if (!mem_cgroup_online(memcg)) return 0; - if (!down_read_trylock(&shrinker_rwsem)) - return 0; - - info = shrinker_info_protected(memcg, nid); + srcu_idx = srcu_read_lock(&shrinker_srcu); + info = shrinker_info_srcu(memcg, nid); if (unlikely(!info)) goto unlock;There is shrinker_nr_max dereference under this hunk. It's not in the patch: for_each_set_bit(i, info->map, shrinker_nr_max) { Since shrinker_nr_max may grow in parallel, this leads to access beyond allocated memory :(Oh, indeed.It looks like we should save size of info->map as a new member of struct shrinker_info.Agree, then we only traverse info->map_size each time. Like below: diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index b6eda2ab205d..f1b5d2803007 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -97,6 +97,7 @@ struct shrinker_info { struct rcu_head rcu; atomic_long_t *nr_deferred; unsigned long *map; + int map_size;Sure, like this. The only thing (after rethinking) I want to say is that despite "size" was may suggestion, now it makes me think that name "size" is about size in bytes. Possible, something like map_nr_max would be better here.
Agree. Will change to it.
}; struct lruvec_stats_percpu { diff --git a/mm/vmscan.c b/mm/vmscan.c index f94bfe540675..dd07eb107915 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -239,14 +239,20 @@ static void free_shrinker_info_rcu(struct rcu_head *head) kvfree(container_of(head, struct shrinker_info, rcu)); } -static int expand_one_shrinker_info(struct mem_cgroup *memcg, - int map_size, int defer_size, - int old_map_size, int old_defer_size) +static int expand_one_shrinker_info(struct mem_cgroup *memcg, int new_nr_max, + int old_nr_max) { + int map_size, defer_size, old_map_size, old_defer_size; struct shrinker_info *new, *old; struct mem_cgroup_per_node *pn; int nid; - int size = map_size + defer_size; + int size; + + map_size = shrinker_map_size(new_nr_max); + defer_size = shrinker_defer_size(new_nr_max); + old_map_size = shrinker_map_size(shrinker_nr_max); + old_defer_size = shrinker_defer_size(shrinker_nr_max);Perhaps these should still be calculated outside the loop as before.Yeah, for me it's also better.+ size = map_size + defer_size; for_each_node(nid) { pn = memcg->nodeinfo[nid]; @@ -261,6 +267,7 @@ static int expand_one_shrinker_info(struct mem_cgroup *memcg, new->nr_deferred = (atomic_long_t *)(new + 1); new->map = (void *)new->nr_deferred + defer_size; + new->map_size = new_nr_max; /* map: set all old bits, clear all new bits */ memset(new->map, (int)0xff, old_map_size); @@ -310,6 +317,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg) } info->nr_deferred = (atomic_long_t *)(info + 1); info->map = (void *)info->nr_deferred + defer_size; + info->map_size = shrinker_nr_max; rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info); } mutex_unlock(&shrinker_mutex); @@ -327,8 +335,6 @@ static int expand_shrinker_info(int new_id) { int ret = 0; int new_nr_max = new_id + 1; - int map_size, defer_size = 0; - int old_map_size, old_defer_size = 0; struct mem_cgroup *memcg; if (!need_expand(new_nr_max)) @@ -339,15 +345,9 @@ static int expand_shrinker_info(int new_id) lockdep_assert_held(&shrinker_mutex); - map_size = shrinker_map_size(new_nr_max); - defer_size = shrinker_defer_size(new_nr_max); - old_map_size = shrinker_map_size(shrinker_nr_max); - old_defer_size = shrinker_defer_size(shrinker_nr_max); - memcg = mem_cgroup_iter(NULL, NULL, NULL); do { - ret = expand_one_shrinker_info(memcg, map_size, defer_size, - old_map_size, old_defer_size); + ret = expand_one_shrinker_info(memcg, new_nr_max, shrinker_nr_max); if (ret) { mem_cgroup_iter_break(NULL, memcg); goto out; @@ -912,7 +912,7 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, if (unlikely(!info)) goto unlock; - for_each_set_bit(i, info->map, shrinker_nr_max) { + for_each_set_bit(i, info->map, info->map_size) { struct shrink_control sc = { .gfp_mask = gfp_mask, .nid = nid, I will send the v2. Thanks, Qi@@ -949,14 +962,9 @@ static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid, set_shrinker_bit(memcg, nid, i); } freed += ret; - - if (rwsem_is_contended(&shrinker_rwsem)) { - freed = freed ? : 1; - break; - } } unlock: - up_read(&shrinker_rwsem); + srcu_read_unlock(&shrinker_srcu, srcu_idx); return freed; } #else /* CONFIG_MEMCG */
-- Thanks, Qi