From: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx> Some kind of memcg want to keep the memory usage in a certain range of time and let them free when time expired. So we introduce a kind of expanding methods to expand the usage when calculate the memcg's protection. Signed-off-by: Zhaoyang Huang <zhaoyang.huang@xxxxxxxxxx> --- include/linux/memcontrol.h | 64 ++++++++++++++++++++++++++++++++++++++++++++++ mm/memcontrol.c | 7 +++++ mm/vmscan.c | 4 +++ 3 files changed, 75 insertions(+) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0c5c403..3c7a2e4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -21,6 +21,8 @@ #include <linux/vmstat.h> #include <linux/writeback.h> #include <linux/page-flags.h> +#include <linux/sched/loadavg.h> +#include <linux/sched/clock.h> struct mem_cgroup; struct obj_cgroup; @@ -28,6 +30,11 @@ struct mm_struct; struct kmem_cache; +#define MEMCG_INTERVAL (2*HZ+1) /* 2 sec intervals */ +#define EXP_10s 1677 /* 1/exp(2s/10s) as fixed-point */ +#define EXP_60s 1981 /* 1/exp(2s/60s) */ +#define EXP_300s 2034 /* 1/exp(2s/300s) */ + /* Cgroup-specific page state, on top of universal node page state */ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, @@ -340,6 +347,12 @@ struct mem_cgroup { struct deferred_split deferred_split_queue; #endif + u64 avg_next_update; + u64 avg_last_update; + u64 prot_period; + struct page_counter memory_latest; + bool allow_expand; + struct mem_cgroup_per_node *nodeinfo[]; }; @@ -608,6 +621,57 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } +/* + * expand the usage via a linear proportion method + */ +static inline unsigned long calc_expanded_usage(struct mem_cgroup *group) +{ + u64 now, decay_factor; + u64 usage_expanded; + s64 growth, usage, last_usage; + u64 delta_time; + + usage = page_counter_read(&group->memory); + last_usage = page_counter_read(&group->memory_latest); + growth = usage - last_usage; + usage_expanded = (unsigned long)usage; + now = sched_clock(); + + if (!usage || !group->avg_next_update) { + group->avg_next_update = now + group->prot_period; + return 0; + } + + if (time_before((unsigned long)now, (unsigned long)group->avg_next_update)) + return 0; + + /* + * skip the expansion if the usage is growing while expand the usage when + * it remains stable or shrinking. + * usage_exp = usage * (1 + delta_time / 34s), which is designed as + * an effective way of linear calculation. + */ + if (growth > 0) + ; + else { + delta_time = group->avg_last_update ? now - group->avg_last_update : 0; + /* + * we take 2048 as "1" and 17s decay 1/2(34bit). then we can get + * decay_factor = 1024 * delta_time / 17s(0x400000000) + * 0.5/17s = decay_factor/delta_time ==> decay_factor = delta_time >> 24 + */ + decay_factor = delta_time >> (34 - 10); + usage_expanded += usage * decay_factor / 2048; + /* + * avg_next_update: expected expire time according to current status + */ + group->avg_last_update = now; + group->avg_next_update = now + jiffies_to_nsecs(2*HZ); + } + atomic_long_set(&group->memory_latest.usage, usage); + return usage_expanded; +} + static inline void mem_cgroup_protection(struct mem_cgroup *root, struct mem_cgroup *memcg, unsigned long *min, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 508bcea..0e7b5b0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6616,6 +6616,7 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, { unsigned long usage, parent_usage; struct mem_cgroup *parent; + unsigned long growth; if (mem_cgroup_disabled()) return; @@ -6637,6 +6638,12 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, if (!usage) return; + /* + * expand the usage by the time if it is allowed + */ + if (memcg->allow_expand) + usage = calc_expanded_usage(memcg); + parent = parent_mem_cgroup(memcg); /* No parent means a non-hierarchical mode on v1 memcg */ if (!parent) diff --git a/mm/vmscan.c b/mm/vmscan.c index ef4a6dc..ea56b5d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3101,8 +3101,12 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc) * Hard protection. * If there is no reclaimable memory, OOM. */ + atomic_long_set(&memcg->memory_latest.usage, + page_counter_read(&memcg->memory)); continue; } else if (mem_cgroup_below_low(memcg)) { + atomic_long_set(&memcg->memory_latest.usage, + page_counter_read(&memcg->memory)); /* * Soft protection. * Respect the protection only as long as -- 1.9.1