This patch add swappiness arg into proactive reclaim interface. User can type "bytes swappiness" into memory.reclaim to control reclaim tendency. Specially, user can type below to only reclaim anon folios: `"100M" 200 > memory.reclaim` Or, type below to only reclaim file folios: `100M 1 > memory.reclaim` User can only type key into memory.reclaim to keep original use. Signed-off-by: Huan Yang <link@xxxxxxxx> --- Documentation/admin-guide/cgroup-v2.rst | 16 ++++--- include/linux/swap.h | 2 + mm/memcontrol.c | 55 +++++++++++++++++++++---- mm/vmscan.c | 2 + 4 files changed, 63 insertions(+), 12 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index c153d0c75f34..5a471ac7f0c3 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1275,16 +1275,22 @@ PAGE_SIZE multiple when read back. This is a simple interface to trigger memory reclaim in the target cgroup. - This file accepts a single key, the number of bytes to reclaim. - No nested keys are currently supported. + This file accepts a few key, the number of bytes to reclaim. + Few nested keys are currently supported. Example:: echo "1G" > memory.reclaim - The interface can be later extended with nested keys to - configure the reclaim behavior. For example, specify the - type of memory to reclaim from (anon, file, ..). + The interface extended with nested keys to configure the + reclaim behavior. For example, specify the swappiness of + memory to reclaim from (anon, file, ..). + + Example:: + + echo "1G" 200 > memory.reclaim (only reclaim anon) + echo "1G" 0 > memory.reclaim (only reclaim file) + echo "1G" 1 > memory.reclaim (only reclaim file) Please note that the kernel can over or under reclaim from the target cgroup. If less bytes are reclaimed than the diff --git a/include/linux/swap.h b/include/linux/swap.h index 3ba146ae7cf5..a024194301d4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -407,6 +407,8 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, #define MEMCG_RECLAIM_MAY_SWAP (1 << 1) #define MEMCG_RECLAIM_PROACTIVE (1 << 2) +#define MEMCG_RECLAIM_ANON (1 << 3) +#define MEMCG_RECLAIM_FILE (1 << 4) extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, diff --git a/mm/memcontrol.c b/mm/memcontrol.c index be2ad117515e..a0e460abd41c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -6953,19 +6953,47 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, unsigned int nr_retries = MAX_RECLAIM_RETRIES; unsigned long nr_to_reclaim, nr_reclaimed = 0; unsigned int reclaim_options; + int swappiness = -1, org_swappiness, n; + char *tmpbuf; int err; - buf = strstrip(buf); - err = page_counter_memparse(buf, "", &nr_to_reclaim); + tmpbuf = kvzalloc(nbytes, GFP_KERNEL); + if (unlikely(!tmpbuf)) + return -ENOMEM; + + buf = skip_spaces(buf); + n = sscanf(buf, "%s %d", tmpbuf, &swappiness); + if (n < 1) { + err = -EINVAL; + goto out_free; + } + + if (n == 2 && (swappiness > 200 || swappiness < 0)) { + err = -EINVAL; + goto out_free; + } + + err = page_counter_memparse(tmpbuf, "", &nr_to_reclaim); if (err) - return err; + goto out_free; reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE; + if (swappiness != -1) { + org_swappiness = memcg->swappiness; + memcg->swappiness = swappiness; + if (swappiness == 200) + reclaim_options |= MEMCG_RECLAIM_ANON; + else if (swappiness == 0 || swappiness == 1) + reclaim_options |= MEMCG_RECLAIM_FILE; + } + while (nr_reclaimed < nr_to_reclaim) { unsigned long reclaimed; - if (signal_pending(current)) - return -EINTR; + if (signal_pending(current)) { + err = -EINTR; + goto out; + } /* * This is the final attempt, drain percpu lru caches in the @@ -6979,13 +7007,26 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, min(nr_to_reclaim - nr_reclaimed, SWAP_CLUSTER_MAX), GFP_KERNEL, reclaim_options); - if (!reclaimed && !nr_retries--) - return -EAGAIN; + if (!reclaimed && !nr_retries--) { + err = -EAGAIN; + goto out; + } nr_reclaimed += reclaimed; } + if (swappiness != -1) + memcg->swappiness = org_swappiness; + return nbytes; + +out: + if (swappiness != -1) + memcg->swappiness = org_swappiness; + +out_free: + kvfree(tmpbuf); + return err; } static struct cftype memory_files[] = { diff --git a/mm/vmscan.c b/mm/vmscan.c index 9243a1f0d606..f4221ec833db 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -6505,6 +6505,8 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, .may_unmap = 1, .may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP), .proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE), + .unbalance_anon = !!(reclaim_options & MEMCG_RECLAIM_ANON), + .unbalance_file = !!(reclaim_options & MEMCG_RECLAIM_FILE), }; /* * Traverse the ZONELIST_FALLBACK zonelist of the current node to put -- 2.34.1