Hey folks, I’ve come across an issue that affects most of 4.19, 4.20 and 5.2 linux-stable kernels that has only been fixed in 5.3-rc1. It was introduced by 29ef680 memcg, oom: move out_of_memory back to the charge path The gist of it is that if you have a memory control group for a process that repeatedly maps all of the pages of a file with repeated calls to: mmap(NULL, pages * PAGE_SIZE, PROT_WRITE|PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0) The memory cg eventually runs out of memory, as it should. However, prior to the 29ef680 commit, it would kill the running process with OOM; After that commit ( and until 5.3-rc1; Haven’t pinpointed the exact commit in between 5.2.0 and 5.3-rc1) the offending process goes into %100 CPU usage, and doesn’t die (prior behavior) or fail the mmap call (which is what happens if one runs the test program with a low ulimit -v value). Any ideas on how to chase this down further? (Test program and script have been pasted below) Thanks, Masoud //——— leaker.c —— #include <sys/mman.h> #include <fcntl.h> #include <unistd.h> #include <stdlib.h> #include <string.h> #include <sys/types.h> #include <sys/uio.h> #include <stdio.h> #include <errno.h> #include <signal.h> #ifndef PAGE_SIZE #define PAGE_SIZE 4096 #endif void sighandler(int x) { printf("SIGNAL %d received. Quitting\n", x); exit(2); } int main(int ac, char*av[]) { int i; int fd; int pages = 4096; char buf[PAGE_SIZE]; char *d; int sum = 0, loop_cnt = 0; int max_loops = 100000; // For getopt(3) stuff: int opt; while ((opt = getopt(ac, av, "p:c:")) != -1) { switch (opt) { case 'p': pages = atoi(optarg); break; case 'c': max_loops = atoi(optarg); break; default: fprintf(stderr, "Wrong usage:\n"); fprintf(stderr, "%s -p <pages> -c <loop_count>\n", av[0]); exit(-1); } } signal(SIGTERM, sighandler); printf("Mapping %d pages anonymously %d times.\n", pages, max_loops); printf("File size will be %ld\n", pages * (long)PAGE_SIZE); printf("max memory usage size will be %ld\n", (long) max_loops * pages * PAGE_SIZE); memset(buf, 0, PAGE_SIZE); fd = open("big-data-file.bin", O_CREAT|O_WRONLY|O_TRUNC , S_IRUSR | S_IWUSR); if (fd == -1) { printf("open failed: %d - %s\n", errno, strerror(errno)); return -1; } for (i=0; i < pages; i++) { write(fd, buf, PAGE_SIZE); } close(fd); fd = open("big-data-file.bin", O_RDWR); printf("fd is %d\n", fd); while (loop_cnt < max_loops) { d = mmap(NULL, pages * PAGE_SIZE, PROT_WRITE|PROT_READ, MAP_FILE|MAP_PRIVATE, fd, 0); if (d == MAP_FAILED) { printf("mmap failed: %d - %s\n", errno, strerror(errno)); return -1; } printf("Buffer is @ %p\n", d); for (i = 0; i < pages * PAGE_SIZE; i++) { sum += d[i]; if ((i & (PAGE_SIZE-1)) == 0) d[i] = 42; } printf("Todal sum was %d. Loop count is %d\n", sum, loop_cnt++); } close(fd); return 0; } ///—— test script launching it… #!/bin/sh if [ `id -u` -ne 0 ]; then echo NEED TO RUN THIS AS ROOT.; exit 1 fi PID=$(echo $$) echo PID detected as: $PID mkdir /sys/fs/cgroup/memory/leaker echo 536870912 > /sys/fs/cgroup/memory/leaker/memory.limit_in_bytes echo leaker mem cgroup created, with `cat /sys/fs/cgroup/memory/leaker/memory.limit_in_bytes` bytes. echo $PID > /sys/fs/cgroup/memory/leaker/cgroup.procs echo Moved into the leaker cgroup. ps -o cgroup $PID sleep 15 echo Starting... ./leaker -p 10240 -c 100000
Attachment:
smime.p7s
Description: S/MIME cryptographic signature