On 2023/2/22 16:16, Qi Zheng wrote:
Hi Kirill,
On 2023/2/22 05:43, Kirill Tkhai wrote:
On 20.02.2023 12:16, Qi Zheng wrote:
Like global slab shrink, since commit 1cd0bd06093c<...>
static bool cgroup_reclaim(struct scan_control *sc)
@@ -891,15 +905,14 @@ static unsigned long shrink_slab_memcg(gfp_t
gfp_mask, int nid,
{
struct shrinker_info *info;
unsigned long ret, freed = 0;
+ int srcu_idx;
int i;
if (!mem_cgroup_online(memcg))
return 0;
- if (!down_read_trylock(&shrinker_rwsem))
- return 0;
-
- info = shrinker_info_protected(memcg, nid);
+ srcu_idx = srcu_read_lock(&shrinker_srcu);
+ info = shrinker_info_srcu(memcg, nid);
if (unlikely(!info))
goto unlock;
There is shrinker_nr_max dereference under this hunk. It's not in the
patch:
for_each_set_bit(i, info->map, shrinker_nr_max) {
Since shrinker_nr_max may grow in parallel, this leads to access
beyond allocated memory :(
Oh, indeed.
It looks like we should save size of info->map as a new member of
struct shrinker_info.
Agree, then we only traverse info->map_size each time. Like below:
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b6eda2ab205d..f1b5d2803007 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -97,6 +97,7 @@ struct shrinker_info {
struct rcu_head rcu;
atomic_long_t *nr_deferred;
unsigned long *map;
+ int map_size;
};
struct lruvec_stats_percpu {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f94bfe540675..dd07eb107915 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -239,14 +239,20 @@ static void free_shrinker_info_rcu(struct rcu_head
*head)
kvfree(container_of(head, struct shrinker_info, rcu));
}
-static int expand_one_shrinker_info(struct mem_cgroup *memcg,
- int map_size, int defer_size,
- int old_map_size, int old_defer_size)
+static int expand_one_shrinker_info(struct mem_cgroup *memcg, int
new_nr_max,
+ int old_nr_max)
{
+ int map_size, defer_size, old_map_size, old_defer_size;
struct shrinker_info *new, *old;
struct mem_cgroup_per_node *pn;
int nid;
- int size = map_size + defer_size;
+ int size;
+
+ map_size = shrinker_map_size(new_nr_max);
+ defer_size = shrinker_defer_size(new_nr_max);
+ old_map_size = shrinker_map_size(shrinker_nr_max);
+ old_defer_size = shrinker_defer_size(shrinker_nr_max);
Perhaps these should still be calculated outside the loop as before.
+ size = map_size + defer_size;
for_each_node(nid) {
pn = memcg->nodeinfo[nid];
@@ -261,6 +267,7 @@ static int expand_one_shrinker_info(struct
mem_cgroup *memcg,
new->nr_deferred = (atomic_long_t *)(new + 1);
new->map = (void *)new->nr_deferred + defer_size;
+ new->map_size = new_nr_max;
/* map: set all old bits, clear all new bits */
memset(new->map, (int)0xff, old_map_size);
@@ -310,6 +317,7 @@ int alloc_shrinker_info(struct mem_cgroup *memcg)
}
info->nr_deferred = (atomic_long_t *)(info + 1);
info->map = (void *)info->nr_deferred + defer_size;
+ info->map_size = shrinker_nr_max;
rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info,
info);
}
mutex_unlock(&shrinker_mutex);
@@ -327,8 +335,6 @@ static int expand_shrinker_info(int new_id)
{
int ret = 0;
int new_nr_max = new_id + 1;
- int map_size, defer_size = 0;
- int old_map_size, old_defer_size = 0;
struct mem_cgroup *memcg;
if (!need_expand(new_nr_max))
@@ -339,15 +345,9 @@ static int expand_shrinker_info(int new_id)
lockdep_assert_held(&shrinker_mutex);
- map_size = shrinker_map_size(new_nr_max);
- defer_size = shrinker_defer_size(new_nr_max);
- old_map_size = shrinker_map_size(shrinker_nr_max);
- old_defer_size = shrinker_defer_size(shrinker_nr_max);
-
memcg = mem_cgroup_iter(NULL, NULL, NULL);
do {
- ret = expand_one_shrinker_info(memcg, map_size, defer_size,
- old_map_size,
old_defer_size);
+ ret = expand_one_shrinker_info(memcg, new_nr_max,
shrinker_nr_max);
if (ret) {
mem_cgroup_iter_break(NULL, memcg);
goto out;
@@ -912,7 +912,7 @@ static unsigned long shrink_slab_memcg(gfp_t
gfp_mask, int nid,
if (unlikely(!info))
goto unlock;
- for_each_set_bit(i, info->map, shrinker_nr_max) {
+ for_each_set_bit(i, info->map, info->map_size) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
I will send the v2.
Thanks,
Qi
@@ -949,14 +962,9 @@ static unsigned long shrink_slab_memcg(gfp_t
gfp_mask, int nid,
set_shrinker_bit(memcg, nid, i);
}
freed += ret;
-
- if (rwsem_is_contended(&shrinker_rwsem)) {
- freed = freed ? : 1;
- break;
- }
}
unlock:
- up_read(&shrinker_rwsem);
+ srcu_read_unlock(&shrinker_srcu, srcu_idx);
return freed;
}
#else /* CONFIG_MEMCG */
--
Thanks,
Qi