On 01/22/2016 04:14 PM, Jan Stancek wrote: > On 01/19/2016 11:29 AM, Tetsuo Handa wrote: >> although I >> couldn't find evidence that mlock() and madvice() are related with this hangup, > > I simplified reproducer by having only single thread allocating > memory when OOM triggers: > http://jan.stancek.eu/tmp/oom_hangs/console.log.3-v4.4-8606-with-memalloc.txt > > In this instance it was mmap + mlock, as you can see from oom call trace. > It made it to do_exit(), but couldn't complete it: I have extracted test from LTP into standalone reproducer (attached), if you want to give a try. It usually hangs my system within ~30 minutes. If it takes too long, you can try disabling swap. From my past experience this usually helped to reproduce it faster on small KVM guests. # gcc oom_mlock.c -pthread -O2 # echo 1 > /proc/sys/vm/overcommit_memory (optionally) # swapoff -a # ./a.out Also, it's interesting to note, that when I disabled mlock() calls test ran fine over night. I'll look into confirming this observation on more systems. Regards, Jan
#include <errno.h> #include <pthread.h> #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <sys/mman.h> #include <sys/wait.h> /* * oom hang reproducer v1 * * # gcc oom_mlock.c -pthread -O2 * # echo 1 > /proc/sys/vm/overcommit_memory * (optionally) # swapoff -a * # ./a.out */ #define _1GB (1024L*1024*1024) static do_mlock = 1; static int alloc_mem(long int length) { char *s; long i, pagesz = getpagesize(); int loop = 10; printf("thread (%lx), allocating %ld bytes, do_mlock: %d\n", (unsigned long) pthread_self(), length, do_mlock); s = mmap(NULL, length, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); if (s == MAP_FAILED) return errno; if (do_mlock) { while (mlock(s, length) == -1 && loop > 0) { if (EAGAIN != errno) return errno; usleep(300000); loop--; } } for (i = 0; i < length; i += pagesz) s[i] = '\a'; return 0; } void *alloc_thread(void *args) { int ret; do { ret = alloc_mem(3 * _1GB); } while (ret == 0); exit(ret); } int trigger_oom(void) { int i, ret, child, status, threads; pthread_t *th; threads = sysconf(_SC_NPROCESSORS_ONLN) - 1; th = malloc(sizeof(pthread_t) * threads); if (!th) { printf("malloc failed\n"); exit(2); } do_mlock = !do_mlock; child = fork(); if (child == 0) { for (i = 0; i < threads - 1; i++) { ret = pthread_create(&th[i], NULL, alloc_thread, NULL); if (ret) { printf("pthread_create failed with %d\n", ret); exit(3); } } pause(); } if (waitpid(-1, &status, 0) == -1) { perror("waitpid"); exit(1); } if (WIFSIGNALED(status)) { printf("child killed by %d\n", WTERMSIG(status)); if (WTERMSIG(status) != SIGKILL) exit(1); } if (WIFEXITED(status)) { printf("child exited with %d\n", WEXITSTATUS(status)); if (WEXITSTATUS(status) != ENOMEM) exit(1); } } int main(void) { int i = 1; while (1) { printf("starting iteration %d\n", i++); trigger_oom(); } return 0; }