The patch titled memcg: add inactive_anon_is_low() has been added to the -mm tree. Its filename is memcg-add-inactive_anon_is_low.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** See http://userweb.kernel.org/~akpm/stuff/added-to-mm.txt to find out what to do about this The current -mm tree may be found at http://userweb.kernel.org/~akpm/mmotm/ ------------------------------------------------------ Subject: memcg: add inactive_anon_is_low() From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> The inactive_anon_is_low() is key component of active/inactive anon balancing on reclaim. However current inactive_anon_is_low() function only consider global reclaim. Therefore, we need following ugly scan_global_lru() condition. if (lru == LRU_ACTIVE_ANON && (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; it cause that memcg reclaim always deactivate pages when shrink_list() is called. To make mem_cgroup_inactive_anon_is_low() improve active/inactive anon balancing of memcgroup. Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@xxxxxxxxxxxxxx> Acked-by: Rik van Riel <riel@xxxxxxxxxx> Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> Cc: Cyrill Gorcunov <gorcunov@xxxxxxxxx> Cc: "Pekka Enberg" <penberg@xxxxxxxxxxxxxx> Cc: Balbir Singh <balbir@xxxxxxxxxx> Cc: Daisuke Nishimura <nishimura@xxxxxxxxxxxxxxxxx> Cc: Hugh Dickins <hugh@xxxxxxxxxxx> Cc: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- include/linux/memcontrol.h | 9 ++++++ mm/memcontrol.c | 46 ++++++++++++++++++++++++++++++++++- mm/vmscan.c | 36 +++++++++++++++++---------- 3 files changed, 77 insertions(+), 14 deletions(-) diff -puN include/linux/memcontrol.h~memcg-add-inactive_anon_is_low include/linux/memcontrol.h --- a/include/linux/memcontrol.h~memcg-add-inactive_anon_is_low +++ a/include/linux/memcontrol.h @@ -100,6 +100,8 @@ extern void mem_cgroup_record_reclaim_pr extern long mem_cgroup_calc_reclaim(struct mem_cgroup *mem, struct zone *zone, int priority, enum lru_list lru); +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, + struct zone *zone); #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP extern int do_swap_account; @@ -251,6 +253,13 @@ static inline bool mem_cgroup_oom_called { return false; } + +static inline int +mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + return 1; +} + #endif /* CONFIG_CGROUP_MEM_CONT */ #endif /* _LINUX_MEMCONTROL_H */ diff -puN mm/memcontrol.c~memcg-add-inactive_anon_is_low mm/memcontrol.c --- a/mm/memcontrol.c~memcg-add-inactive_anon_is_low +++ a/mm/memcontrol.c @@ -156,6 +156,9 @@ struct mem_cgroup { unsigned long last_oom_jiffies; int obsolete; atomic_t refcnt; + + unsigned int inactive_ratio; + /* * statistics. This must be placed at the end of memcg. */ @@ -431,6 +434,20 @@ long mem_cgroup_calc_reclaim(struct mem_ return (nr_pages >> priority); } +int mem_cgroup_inactive_anon_is_low(struct mem_cgroup *memcg, struct zone *zone) +{ + unsigned long active; + unsigned long inactive; + + inactive = mem_cgroup_get_all_zonestat(memcg, LRU_INACTIVE_ANON); + active = mem_cgroup_get_all_zonestat(memcg, LRU_ACTIVE_ANON); + + if (inactive * memcg->inactive_ratio < active) + return 1; + + return 0; +} + unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, struct list_head *dst, unsigned long *scanned, int order, @@ -1360,6 +1377,29 @@ int mem_cgroup_shrink_usage(struct mm_st return 0; } +/* + * The inactive anon list should be small enough that the VM never has to + * do too much work, but large enough that each inactive page has a chance + * to be referenced again before it is swapped out. + * + * this calculation is straightforward porting from + * page_alloc.c::setup_per_zone_inactive_ratio(). + * it describe more detail. + */ +static void mem_cgroup_set_inactive_ratio(struct mem_cgroup *memcg) +{ + unsigned int gb, ratio; + + gb = res_counter_read_u64(&memcg->res, RES_LIMIT) >> 30; + if (gb) + ratio = int_sqrt(10 * gb); + else + ratio = 1; + + memcg->inactive_ratio = ratio; + +} + static DEFINE_MUTEX(set_limit_mutex); static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, @@ -1398,6 +1438,10 @@ static int mem_cgroup_resize_limit(struc GFP_KERNEL, false); if (!progress) retry_count--; } + + if (!ret) + mem_cgroup_set_inactive_ratio(memcg); + return ret; } @@ -1982,7 +2026,7 @@ mem_cgroup_create(struct cgroup_subsys * res_counter_init(&mem->res, NULL); res_counter_init(&mem->memsw, NULL); } - + mem_cgroup_set_inactive_ratio(mem); mem->last_scanned_child = NULL; return &mem->css; diff -puN mm/vmscan.c~memcg-add-inactive_anon_is_low mm/vmscan.c --- a/mm/vmscan.c~memcg-add-inactive_anon_is_low +++ a/mm/vmscan.c @@ -1357,14 +1357,7 @@ static void shrink_active_list(unsigned pagevec_release(&pvec); } -/** - * inactive_anon_is_low - check if anonymous pages need to be deactivated - * @zone: zone to check - * - * Returns true if the zone does not have enough inactive anon pages, - * meaning some active anon pages need to be deactivated. - */ -static int inactive_anon_is_low(struct zone *zone) +static int inactive_anon_is_low_global(struct zone *zone) { unsigned long active, inactive; @@ -1377,6 +1370,25 @@ static int inactive_anon_is_low(struct z return 0; } +/** + * inactive_anon_is_low - check if anonymous pages need to be deactivated + * @zone: zone to check + * @sc: scan control of this context + * + * Returns true if the zone does not have enough inactive anon pages, + * meaning some active anon pages need to be deactivated. + */ +static int inactive_anon_is_low(struct zone *zone, struct scan_control *sc) +{ + int low; + + if (scan_global_lru(sc)) + low = inactive_anon_is_low_global(zone); + else + low = mem_cgroup_inactive_anon_is_low(sc->mem_cgroup, zone); + return low; +} + static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, struct zone *zone, struct scan_control *sc, int priority) { @@ -1388,7 +1400,7 @@ static unsigned long shrink_list(enum lr } if (lru == LRU_ACTIVE_ANON && - (!scan_global_lru(sc) || inactive_anon_is_low(zone))) { + inactive_anon_is_low(zone, sc)) { shrink_active_list(nr_to_scan, zone, sc, priority, file); return 0; } @@ -1553,9 +1565,7 @@ static void shrink_zone(int priority, st * Even if we did not try to evict anon pages at all, we want to * rebalance the anon lru active/inactive ratio. */ - if (!scan_global_lru(sc) || inactive_anon_is_low(zone)) - shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); - else if (!scan_global_lru(sc)) + if (inactive_anon_is_low(zone, sc)) shrink_active_list(SWAP_CLUSTER_MAX, zone, sc, priority, 0); throttle_vm_writeout(sc->gfp_mask); @@ -1851,7 +1861,7 @@ loop_again: * Do some background aging of the anon list, to give * pages a chance to be referenced before reclaiming. */ - if (inactive_anon_is_low(zone)) + if (inactive_anon_is_low(zone, &sc)) shrink_active_list(SWAP_CLUSTER_MAX, zone, &sc, priority, 0); _ Patches currently in -mm which might be from kamezawa.hiroyu@xxxxxxxxxxxxxx are vmscan-evict-streaming-io-first.patch mm-gup-persist-for-write-permission.patch mm-wp-lock-page-before-deciding-cow.patch mm-reuse_swap_page-replaces-can_share_swap_page.patch mm-try_to_free_swap-replaces-remove_exclusive_swap_page.patch mm-try_to_unuse-check-removing-right-swap.patch mm-remove-try_to_munlock-from-vmscan.patch mm-remove-gfp_mask-from-add_to_swap.patch mm-add-add_to_swap-stub.patch mm-optimize-get_scan_ratio-for-no-swap.patch memcg-reclaim-shouldnt-change-zone-recent_rotated-statistics.patch swapfile-swapon-needs-larger-size-type.patch swapfile-remove-swp_active-mask.patch swapfile-remove-surplus-whitespace.patch swapfile-remove-v0-swap-space-message.patch swapfile-rearrange-scan-and-swap_info.patch swapfile-swapon-use-discard-trim.patch swapfile-swap-allocation-use-discard.patch swapfile-swapon-randomize-if-nonrot.patch swapfile-swap-allocation-cycle-if-nonrot.patch swapfile-change-discard-pgoff_t-to-sector_t.patch swapfile-change-discard-pgoff_t-to-sector_t-fix.patch swapfile-let-others-seed-random.patch cgroups-make-cgroup-config-a-submenu.patch cgroups-documentation-updates.patch cgroups-remove-some-redundant-null-checks.patch ns_cgroup-remove-unused-spinlock.patch memcg-fix-a-typo-in-kconfig.patch cgroups-add-lock-for-child-cgroups-in-cgroup_post_fork.patch cgroups-fix-cgroup_iter_next-bug.patch cgroups-dont-put-struct-cgroupfs_root-protected-by-rcu.patch cgroups-use-task_lock-for-access-tsk-cgroups-safe-in-cgroup_clone.patch cgroups-call-find_css_set-safely-in-cgroup_attach_task.patch cgroups-remove-rcu_read_lock-in-cgroupstats_build.patch cgroups-make-root_list-contains-active-hierarchies-only.patch cgroups-add-inactive-subsystems-to-rootnodesubsys_list.patch cgroups-introduce-link_css_set-to-remove-duplicate-code.patch devcgroup-use-list_for_each_entry_rcu.patch memcg-introduce-charge-commit-cancel-style-of-functions.patch memcg-introduce-charge-commit-cancel-style-of-functions-fix.patch memcg-fix-gfp_mask-of-callers-of-charge.patch memcg-simple-migration-handling.patch memcg-do-not-recalculate-section-unnecessarily-in-init_section_page_cgroup.patch memcg-move-all-acccounts-to-parent-at-rmdir.patch memcg-reduce-size-of-mem_cgroup-by-using-nr_cpu_ids.patch memcg-new-force_empty-to-free-pages-under-group.patch memcg-new-force_empty-to-free-pages-under-group-fix.patch memcg-new-force_empty-to-free-pages-under-group-fix-fix.patch memcg-handle-swap-caches.patch memcg-handle-swap-caches-build-fix.patch memcg-memswap-controller-kconfig.patch memcg-swap-cgroup-for-remembering-usage.patch memcg-memswap-controller-core.patch memcg-memswap-controller-core-make-resize-limit-hold-mutex.patch memcg-memswap-controller-core-swapcache-fixes.patch memcg-synchronized-lru.patch memcg-add-mem_cgroup_disabled.patch memcg-add-mem_cgroup_disabled-fix.patch memory-cgroup-hierarchy-documentation-v4.patch memory-cgroup-resource-counters-for-hierarchy-v4.patch memory-cgroup-resource-counters-for-hierarchy-v4-checkpatch-fixes.patch memory-cgroup-hierarchical-reclaim-v4.patch memory-cgroup-hierarchical-reclaim-v4-checkpatch-fixes.patch memory-cgroup-hierarchical-reclaim-v4-fix-for-hierarchical-reclaim.patch memory-cgroup-hierarchy-feature-selector-v4.patch memory-cgroup-hierarchy-feature-selector-v4-fix.patch memcontrol-rcu_read_lock-to-protect-mm_match_cgroup.patch memcg-avoid-unnecessary-system-wide-oom-killer.patch memcg-avoid-unnecessary-system-wide-oom-killer-fix.patch memcg-fix-reclaim-result-checks.patch memcg-revert-gfp-mask-fix.patch memcg-check-group-leader-fix.patch memcg-memoryswap-controller-fix-limit-check.patch memcg-swapout-refcnt-fix.patch memcg-hierarchy-avoid-unnecessary-reclaim.patch inactive_anon_is_low-move-to-vmscan.patch mm-introduce-zone_reclaim-struct.patch mm-add-zone-nr_pages-helper-function.patch mm-make-get_scan_ratio-safe-for-memcg.patch memcg-add-null-check-to-page_cgroup_zoneinfo.patch memcg-add-inactive_anon_is_low.patch memcg-add-mem_cgroup_zone_nr_pages.patch memcg-add-zone_reclaim_stat.patch memcg-remove-mem_cgroup_cal_reclaim.patch memcg-show-reclaim-stat.patch memcg-rename-scan-global-lru.patch memcg-protect-prev_priority.patch memcg-swappiness.patch memcg-explain-details-and-test-document.patch cpuset-rcu_read_lock-to-protect-task_cs.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html