On Fri, 22 Apr 2011 14:39:24 +0900 (JST) KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> wrote: > > +bool mem_cgroup_kswapd_can_sleep(void) > > +{ > > + return list_empty(&memcg_kswapd_control.list); > > +} > > and, > > > @@ -2583,40 +2585,46 @@ static void kswapd_try_to_sleep(struct kswapd *kswapd_p, int order, > > } else { > > + /* For now, we just check the remaining works.*/ > > + if (mem_cgroup_kswapd_can_sleep()) > > + schedule(); > > has bad assumption. If freeable memory is very little and kswapds are > contended, memcg-kswap also have to give up and go into sleep as global > kswapd. > > Otherwise, We are going to see kswapd cpu 100% consumption issue again. > Hmm, ok. need to add more logics. Is it ok to have add-on patch like this ? I'll consider some more smart and fair.... == Because memcg-kswapd push back memcg to the list when there is remaining work, it may consume too much cpu when it finds hard-to-reclaim-memcg. This patch adds a penalty to hard-to-reclaim memcg and reduces chance to be scheduled again. --- include/linux/memcontrol.h | 2 +- mm/memcontrol.c | 14 +++++++++++--- mm/vmscan.c | 4 ++-- 3 files changed, 14 insertions(+), 6 deletions(-) Index: mmotm-Apr14/include/linux/memcontrol.h =================================================================== --- mmotm-Apr14.orig/include/linux/memcontrol.h +++ mmotm-Apr14/include/linux/memcontrol.h @@ -96,7 +96,7 @@ extern int mem_cgroup_select_victim_node extern bool mem_cgroup_kswapd_can_sleep(void); extern struct mem_cgroup *mem_cgroup_get_shrink_target(void); -extern void mem_cgroup_put_shrink_target(struct mem_cgroup *mem); +extern void mem_cgroup_put_shrink_target(struct mem_cgroup *mem, int pages); extern wait_queue_head_t *mem_cgroup_kswapd_waitq(void); extern int mem_cgroup_kswapd_bonus(struct mem_cgroup *mem); Index: mmotm-Apr14/mm/memcontrol.c =================================================================== --- mmotm-Apr14.orig/mm/memcontrol.c +++ mmotm-Apr14/mm/memcontrol.c @@ -4739,6 +4739,10 @@ struct mem_cgroup *mem_cgroup_get_shrink memcg_kswapd_wait_list); list_del_init(&mem->memcg_kswapd_wait_list); } + if (mem && mem->stalled) { + mem->stalled--; /* This memcg was cpu hog */ + continue; + } } while (mem && !css_tryget(&mem->css)); if (mem) atomic_inc(&mem->kswapd_running); @@ -4747,7 +4751,7 @@ struct mem_cgroup *mem_cgroup_get_shrink return mem; } -void mem_cgroup_put_shrink_target(struct mem_cgroup *mem) +void mem_cgroup_put_shrink_target(struct mem_cgroup *mem, int nr_pages) { if (!mem) return; @@ -4755,8 +4759,12 @@ void mem_cgroup_put_shrink_target(struct if (!mem_cgroup_watermark_ok(mem, CHARGE_WMARK_HIGH)) { spin_lock(&memcg_kswapd_control.lock); if (list_empty(&mem->memcg_kswapd_wait_list)) { - list_add_tail(&mem->memcg_kswapd_wait_list, - &memcg_kswapd_control.list); + /* If memory reclaim was smooth, resched it */ + if (nr_pages >= SWAP_CLUSTER_MAX/2) + list_add_tail(&mem->memcg_kswapd_wait_list, + &memcg_kswapd_control.list); + else + mem->stalled += 1; /* ignore this memcg for a while */ } spin_unlock(&memcg_kswapd_control.lock); } Index: mmotm-Apr14/mm/vmscan.c =================================================================== --- mmotm-Apr14.orig/mm/vmscan.c +++ mmotm-Apr14/mm/vmscan.c @@ -2892,8 +2892,8 @@ int kswapd(void *p) } else { mem = mem_cgroup_get_shrink_target(); if (mem) - shrink_mem_cgroup(mem, order); - mem_cgroup_put_shrink_target(mem); + ret = shrink_mem_cgroup(mem, order); + mem_cgroup_put_shrink_target(mem, ret); } } return 0; == -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxxx For more info on Linux MM, see: http://www.linux-mm.org/ . Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/ Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>