Re: [PATCH 4/6] mm, oom: skip vforked tasks from being selected

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Michal Hocko wrote:
> On Wed 01-06-16 23:12:20, Tetsuo Handa wrote:
> > Michal Hocko wrote:
> > > vforked tasks are not really sitting on any memory. They are sharing
> > > the mm with parent until they exec into a new code. Until then it is
> > > just pinning the address space. OOM killer will kill the vforked task
> > > along with its parent but we still can end up selecting vforked task
> > > when the parent wouldn't be selected. E.g. init doing vfork to launch
> > > a task or vforked being a child of oom unkillable task with an updated
> > > oom_score_adj to be killable.
> > > 
> > > Make sure to not select vforked task as an oom victim by checking
> > > vfork_done in oom_badness.
> > 
> > While vfork()ed task cannot modify userspace memory, can't such task
> > allocate significant amount of kernel memory inside execve() operation
> > (as demonstrated by CVE-2010-4243 64bit_dos.c )?
> > 
> > It is possible that killing vfork()ed task releases a lot of memory,
> > isn't it?
> 
> I am not familiar with the above CVE but doesn't that allocated memory
> come after flush_old_exec (and so mm_release)?

That memory is allocated as of copy_strings() in do_execveat_common().

An example shown below (based on https://grsecurity.net/~spender/exploits/64bit_dos.c )
can consume nearly 50% of 2GB RAM while execve() from vfork(). That is, selecting
vfork()ed task as an OOM victim might release nearly 50% of 2GB RAM.

----------
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#define NUM_ARGS 8000 /* Nearly 50% of 2GB RAM */

int main(void)
{
        /* Be sure to do "ulimit -s unlimited" before run. */
        char **args;
        char *str;
        int i;
        str = malloc(128 * 1024);
        memset(str, ' ', 128 * 1024 - 1);
        str[128 * 1024 - 1] = '\0';
        args = malloc(NUM_ARGS * sizeof(char *));
        for (i = 0; i < (NUM_ARGS - 1); i++)
                args[i] = str;
        args[i] = NULL;
        if (vfork() == 0) {
                execve("/bin/true", args, NULL);
                _exit(1);
        }
        return 0;
}
----------

# strace -f ./a.out
execve("./a.out", ["./a.out"], [/* 22 vars */]) = 0
brk(0)                                  = 0x2283000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2bdbc81000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=44165, ...}) = 0
mmap(NULL, 44165, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b2bdbc82000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \34\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2112384, ...}) = 0
mmap(NULL, 3936832, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b2bdbe84000
mprotect(0x2b2bdc03b000, 2097152, PROT_NONE) = 0
mmap(0x2b2bdc23b000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b7000) = 0x2b2bdc23b000
mmap(0x2b2bdc241000, 16960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b2bdc241000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2bdbc8d000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2bdbc8e000
arch_prctl(ARCH_SET_FS, 0x2b2bdbc8db80) = 0
mprotect(0x2b2bdc23b000, 16384, PROT_READ) = 0
mprotect(0x600000, 4096, PROT_READ)     = 0
mprotect(0x2b2bdbe81000, 4096, PROT_READ) = 0
munmap(0x2b2bdbc82000, 44165)           = 0
mmap(NULL, 135168, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2bdbc90000
brk(0)                                  = 0x2283000
brk(0x22b3000)                          = 0x22b3000
brk(0)                                  = 0x22b3000
vfork(Process 9787 attached
 <unfinished ...>
[pid  9787] execve("/bin/true", ["                                "..., (...snipped...), ...], [/* 0 vars */] <unfinished ...>
[pid  9786] <... vfork resumed> )       = 9787
[pid  9786] exit_group(0)               = ?
[pid  9786] +++ exited with 0 +++
<... execve resumed> )                  = 0
brk(0)                                  = 0x13e2000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2e71a6f000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=44165, ...}) = 0
mmap(NULL, 44165, PROT_READ, MAP_PRIVATE, 3, 0) = 0x2b2e71a70000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \34\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2112384, ...}) = 0
mmap(NULL, 3936832, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x2b2e71c6e000
mprotect(0x2b2e71e25000, 2097152, PROT_NONE) = 0
mmap(0x2b2e72025000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b7000) = 0x2b2e72025000
mmap(0x2b2e7202b000, 16960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x2b2e7202b000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2e71a7b000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x2b2e71a7c000
arch_prctl(ARCH_SET_FS, 0x2b2e71a7bb80) = 0
mprotect(0x2b2e72025000, 16384, PROT_READ) = 0
mprotect(0x605000, 4096, PROT_READ)     = 0
mprotect(0x2b2e71c6b000, 4096, PROT_READ) = 0
munmap(0x2b2e71a70000, 44165)           = 0
exit_group(0)                           = ?
+++ exited with 0 +++

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]