Re: [PATCH] mm: Do not deactivate when the cgroup has plenty inactive page

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2020/10/14 10:47 下午, Michal Hocko wrote:
On Wed 14-10-20 22:21:58, zhong jiang wrote:
On 2020/9/30 12:02 上午, Michal Hocko wrote:
On Sun 27-09-20 10:39:22, zhong jiang wrote:
On 2020/9/25 8:07 下午, Michal Hocko wrote:
On Fri 25-09-20 19:49:12, zhongjiang-ali wrote:
After appling the series patches(mm: fix page aging across multiple cgroups),
cgroup memory reclaim strategy is based on reclaim root's inactive:active
ratio. if the target lruvec need to deactivate, its children cgroup also will
deactivate. That will result in hot page to be reclaimed and other cgroup's
cold page will be left, which is not expected.

The patch will not force deactivate when inactive_is_low is not true unless
we has scanned the inactive list and memory is unable to reclaim.
Do you have any data to present?
I write an testcase that cgroup B has a lot of hot pagecache and cgroup C
is full of cold pagecache.  and

their parent cgroup A will trigger the reclaim due of it's limit has been
breached.


The testcase should assume that we should not reclaim the  hot pagecache in
cgroup B because C has

plenty cold pagecache.   Unfortunately,  I can see cgroup B hot pagecache
has been decreased when

cgroup A trigger the reclaim.
Thank you, this is more or less what've expected from your initial
description. An extended form would be preferable for the changelog to
make the setup more clear. But you still haven't quantified the effect.
It would be also good to describe what is the effect on the workload
described by 53138cea7f39 ("mm: vmscan: move file exhaustion detection
to the node level"). A more extended description on the implementation
would be nice as well.
Hi,  Michal

I'm sorry for lately reply due of a long vacation.  But that indeed breach
the initial purpose.

Do you think the patch make sense, or any benchmark can be recommended to
get some data.
To be honest I have really hard time to grasp what is the effect of this
patch. Also memory reclaim tuning without any data doesn't really sound
convincing. Do you see any real workload that is benefiting from this
change or this is mostly a based on reading the code and a theoretical
concern?

I write an testcase based on LTP to test the condition  and see active page will be deactived but

child cgroup still has a lot of cold memory, which is not unexpected.

The testcase is attached.

Please understand that the existing balancing is quite complex and any
changes should be carfully analyzed and described.

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <sys/types.h>
#include <sys/sysinfo.h>
#include <sys/wait.h>

#include "tst_test.h"

#define HOT_CACHE	(1024 * 1024 * 500UL)
#define COLD_CACHE	(1024 * 1024 * 1024 * 2UL)
#define MEMORY_MAX	(COLD_CACHE / 2)
#define HOT_MAX		(1024 * 1024 * 400UL)
#define MEMCG_ROOT 	"/sys/fs/cgroup/memory"
#define MEMCG_CHILD_HOT		"memcg_test_0"
#define MEMCG_CHILD_COLD	"memcg_test_1"
#define tempfile_hot	"memcg_test.hot"
#define tempfile_cold	"memcg_test.cold"
#define TEST_PASS	0
#define TEST_FAIL	-1

static inline int values_close(long a, long b, int err)
{
    return abs(a - b) <= (a + b) / 100 * err;
}

static int cg_create(char *path, char *parent, char *memcg)
{
	sprintf(path, "%s/%s", parent, memcg);

	return mkdir(path, 0644);
}

static int cg_write(char *memcg, char *control, char *buf, ssize_t len)
{
	char path[PATH_MAX];
	int fd;

	sprintf(path, "%s/%s", memcg, control);
	fd = open(path, O_WRONLY);
	if (fd < 0)
		return fd;
	len = write(fd, buf, len);
	close(fd);

	return len;
}

static int cg_move(char *cgroup)
{
	char pid_name[10] = {0,};
	char buf[PATH_MAX];

	strcpy(buf, cgroup);
	sprintf(pid_name, "%d", getpid());

	return cg_write(dirname(buf), "cgroup.procs", pid_name, strlen(pid_name));
}

static int cg_destroy(char *parent, char *cgroup)
{
	char path[PATH_MAX];
	int ret;
	int loops = 1;

	if (!cgroup)
		sprintf(path, "%s", parent);
	else
		sprintf(path, "%s/%s", parent, cgroup);
retry:
	ret = rmdir(path);
	if (ret && errno == EBUSY && loops) {
		ret = cg_move(path);
		if (ret < 0)
			goto out;
		else {
			loops--;
			goto retry;
		}
	} else if (ret && errno == ENOENT)
		ret = 0;
out:
	return ret;
}

static int open_temp_file(char *name)
{
    return open(name, O_CREAT | O_RDWR | O_TRUNC, S_IRWXU);
}

static void remove_temp_file(char *name)
{
    unlink(name);
}

static void cleanup(void)
{
	char parent[PATH_MAX];

	if (!access(tempfile_hot, F_OK))
		remove_temp_file(tempfile_hot);
	if (!access(tempfile_cold, F_OK))
		remove_temp_file(tempfile_cold);

	sprintf(parent, "%s/%s", MEMCG_ROOT, "memcg_test");
	if (!access(parent, F_OK)) {
		cg_destroy(parent, MEMCG_CHILD_HOT);
		cg_destroy(parent, MEMCG_CHILD_COLD);
		cg_destroy(parent, NULL);
	}
}

static int cg_write_max(char *memcg, unsigned long max)
{
	char buf[100];

	sprintf(buf, "%ld", max);
	return cg_write(memcg, "memory.limit_in_bytes", buf, 100);
}

static void remove_temp_files(void)
{
	remove_temp_file(tempfile_hot);
	remove_temp_file(tempfile_cold);
}

static int read_file(int fd, unsigned long size, char *buf, int pagesize)
{
	unsigned long i;
	int len = 0;

	lseek(fd, 0, SEEK_SET);
	for (i = 0; i < size; i += pagesize) {
		len = read(fd, buf, pagesize);
		if (len < 0)
			break;
	}

	return len;
}

static void memcg_active_file(char *path, int fd, unsigned long size)
{
	char *buf;
	char *value;
	int pagesize = getpagesize();

	value = malloc(pagesize);
	buf = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	memset(buf, 'A', size);

	/* trigger memory reclaim to putback inactive page to active list */
	cg_write_max(path, HOT_MAX);
	munmap(buf, size);
	/* mark_page_accessed will make the page to be activated */
	read_file(fd, size, value, pagesize);
	free(value);
}

static int touch_pagecache(int fd, unsigned long size)
{
	char *buf;
	int pagesize = getpagesize();
	int len = -1;

	buf = malloc(pagesize);
	if (ftruncate(fd, size))
		goto out;

	len = read_file(fd, size, buf, pagesize);
out:
	free(buf);
	return len;
}
 
static int cg_read_stat(char *memcg, char *control, ssize_t *value)
{
	char path[PATH_MAX];
	char str[1024], temp[20];
	int found = 0;
	FILE *fp;

	sprintf(path, "%s/%s", memcg, "memory.stat");
	fp = fopen(path, "r");
	if (!fp)
		goto out;

	while (fgets(str, 1024, fp) != NULL) {
		if (!strncmp(str, control, strlen(control))) {
			sscanf(str, "%s %ld", temp, value);
			found = 1;
			break;
		} else
			continue;
	}

	fclose(fp);
	if (found)
		return 0;
out:
	return -1;
}

static int cg_write_conf(char *path, char *parent, char *memcg)
{
	char pid[10];

	if (cg_create(path, parent, memcg))
		goto out;

	sprintf(pid, "%d", getpid());
	cg_write(path, "cgroup.procs", pid, strlen(pid));

	return 0;
out:
	return -1;
}

static int memcg_create_cold(char *parent, char *memcg)
{
	int fd, ret;
	char path[PATH_MAX] = {0};
	pid_t pid;

	pid = fork();
	if (pid > 0) {
		if (wait(NULL) < 0)
			return -1;
		else
			return 0;
	} else if (pid == 0) {
		ret = cg_write_conf(path, parent, memcg);
		if (ret < 0)
			goto out;

		fd = open_temp_file(tempfile_cold);
		if (fd < 0)
			goto out_destroy;

		if (touch_pagecache(fd, COLD_CACHE) < 0)
			goto out_fd;

		close(fd);
		exit(0);
	} else
		return -1;

out_fd:
	close(fd);
	remove_temp_file(tempfile_cold);
out_destroy:
	cg_destroy(path, NULL);
out:
	exit(-1);
}

static int memcg_create_hot(char *parent, char *memcg)
{
	int ret, fd;
	char path[PATH_MAX];
	pid_t pid;

	pid = fork();
	if (pid > 0) {
		if (wait(NULL) < 0)
			return -1;
		else
			return 0;
	} else if (pid == 0) {
		ret = cg_write_conf(path, parent, memcg);
		if (ret < 0)
			goto out;

		fd = open_temp_file(tempfile_hot);
		if (fd < 0)
			goto out_destroy;

		if (touch_pagecache(fd, HOT_CACHE) < 0)
			goto out_fd;
		memcg_active_file(path, fd, HOT_CACHE);

		close(fd);
		exit(0);
	} else
		return -1;

out_fd:
	close(fd);
	remove_temp_file(tempfile_hot);
out_destroy:
	cg_destroy(path, NULL);
out:
	exit(-1);
}

static void memcg_get_active_pages(char *parent, char *memcg, long *active)
{
	char path[PATH_MAX];

	sprintf(path, "%s/%s", parent, memcg);
	cg_read_stat(path, "active_file", active);
}

static int memcg_measure_compare(char *parent, char *memcg, ssize_t size)
{
	long active;

	memcg_get_active_pages(parent, memcg, &active);
	if (values_close(active, size, 3))
		return 0;
	else
		return -1;
}

static void memcg_balance_run(void)
{
	char parent[PATH_MAX];
	int ret;
	long hot_active;

	if (access(MEMCG_ROOT, F_OK))
		tst_brk(TCONF, "memory has not been mounted");

	ret = cg_write_conf(parent, MEMCG_ROOT, "memcg_test");
	if (ret)
		goto out;

	ret = memcg_create_hot(parent, MEMCG_CHILD_HOT);
	if (ret)
		goto out_parent;

	memcg_get_active_pages(parent, MEMCG_CHILD_HOT, &hot_active);
	ret = memcg_create_cold(parent, MEMCG_CHILD_COLD);
	if (ret)
		goto out_child_hot;

	/* set parent memcg's limit_in_bytes to trigger memory reclaim. */
	ret = cg_write_max(parent, MEMORY_MAX);
	if (ret < 0)
		goto out_parent;

	/* Assume that we should not less that the specified value within a certain deviation */
	ret = memcg_measure_compare(parent, MEMCG_CHILD_HOT, hot_active);
	if (ret)
		goto out_child_cold;

	remove_temp_files();

out_child_cold:
	cg_destroy(parent, MEMCG_CHILD_COLD);
out_child_hot:
	cg_destroy(parent, MEMCG_CHILD_HOT);
out_parent:
	cg_destroy(parent, NULL);
out:
	if (ret == TEST_FAIL)
		tst_res(TFAIL, "memcg balance");
	else
		tst_res(TPASS, "memcg balance");
}

static struct tst_test test = {
	.test_all = memcg_balance_run,
	.min_kver = "4.19",
	.needs_root = 1,
	.cleanup = cleanup,
	.timeout = 3600,
};

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux