Re: [PATCH] exec argument expansion can inappropriately trigger OOM-killer

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



> Brad Spengler published a local memory-allocation DoS that
> evades the OOM-killer (though not the virtual memory RLIMIT):
> http://www.grsecurity.net/~spender/64bit_dos.c
> 
> The recent changes to create a stack guard page helps slightly to
> discourage this attack, but it is not sufficient. Compiling it statically
> moves the libraries out of the way, allowing the stack VMA to fill the
> entire TASK_SIZE.
> 
> There are two issues:
>  1) the OOM killer doesn't notice this argv memory explosion
>  2) the argv expansion does not check if rlim[RLIMIT_STACK].rlim_cur is -1.
> 
> I figure a quick solution for #2 would be the following patch. However,
> running multiple copies of this program could result in similar OOM
> behavior, so issue #1 still needs a solution.
>
>Reported-by: Brad Spengler <spender@xxxxxxxxxxxxxx>
>Signed-off-by: Kees Cook <kees.cook@xxxxxxxxxxxxx>

Reviewed-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>


And, I have a patch for #1. Can you please see this? Alternative idea
is to change rss accounting itself.



>From d4e114e5d31b14ebfc399d4b1fb142c7dfce0ca4 Mon Sep 17 00:00:00 2001
From: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
Date: Thu, 19 Aug 2010 20:40:20 +0900
Subject: [PATCH] oom: don't ignore temporary rss while execve

execve() makes new mm struct and setup stack and argv vector,
Unfortunately this new mm is not pointed any tasks, then oom-kill
can't detect this memory usage. therefore oom-kill may kill incorrect
task.

This patch added in-exec rss treatness to oom.

Signed-off-by: KOSAKI Motohiro <kosaki.motohiro@xxxxxxxxxxxxxx>
---
 fs/compat.c             |    8 ++++++--
 fs/exec.c               |   19 +++++++++++++++++--
 include/linux/binfmts.h |    1 +
 include/linux/sched.h   |    1 +
 mm/oom_kill.c           |   36 +++++++++++++++++++++++++++---------
 5 files changed, 52 insertions(+), 13 deletions(-)

diff --git a/fs/compat.c b/fs/compat.c
index 718c706..643140c 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1527,6 +1527,7 @@ int compat_do_execve(char * filename,
 	retval = bprm_mm_init(bprm);
 	if (retval)
 		goto out_file;
+	set_exec_mm(bprm->mm);
 
 	bprm->argc = compat_count(argv, MAX_ARG_STRINGS);
 	if ((retval = bprm->argc) < 0)
@@ -1560,6 +1561,7 @@ int compat_do_execve(char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
+	set_exec_mm(NULL);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1567,8 +1569,10 @@ int compat_do_execve(char * filename,
 	return retval;
 
 out:
-	if (bprm->mm)
-		mmput(bprm->mm);
+	if (current->in_exec_mm) {
+		struct mm_struct *in_exec_mm = set_exec_mm(NULL);
+		mmput (in_exec_mm);
+	}
 
 out_file:
 	if (bprm->file) {
diff --git a/fs/exec.c b/fs/exec.c
index 2d94552..85192e1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1314,6 +1314,17 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
 
 EXPORT_SYMBOL(search_binary_handler);
 
+struct mm_struct* set_exec_mm(struct mm_struct *mm)
+{
+	struct mm_struct *old = current->in_exec_mm;
+
+	task_lock(current);
+	current->in_exec_mm = mm;
+	task_unlock(current);
+
+	return old;
+}
+
 /*
  * sys_execve() executes a new program.
  */
@@ -1361,6 +1372,7 @@ int do_execve(const char * filename,
 	retval = bprm_mm_init(bprm);
 	if (retval)
 		goto out_file;
+	set_exec_mm(bprm->mm);
 
 	bprm->argc = count(argv, MAX_ARG_STRINGS);
 	if ((retval = bprm->argc) < 0)
@@ -1395,6 +1407,7 @@ int do_execve(const char * filename,
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
+	set_exec_mm(NULL);
 	acct_update_integrals(current);
 	free_bprm(bprm);
 	if (displaced)
@@ -1402,8 +1415,10 @@ int do_execve(const char * filename,
 	return retval;
 
 out:
-	if (bprm->mm)
-		mmput (bprm->mm);
+	if (current->in_exec_mm) {
+		struct mm_struct *in_exec_mm = set_exec_mm(NULL);
+		mmput (in_exec_mm);
+	}
 
 out_file:
 	if (bprm->file) {
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index a065612..8cf61eb 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -133,6 +133,7 @@ extern void install_exec_creds(struct linux_binprm *bprm);
 extern void do_coredump(long signr, int exit_code, struct pt_regs *regs);
 extern void set_binfmt(struct linux_binfmt *new);
 extern void free_bprm(struct linux_binprm *);
+extern struct mm_struct* set_exec_mm(struct mm_struct *mm);
 
 #endif /* __KERNEL__ */
 #endif /* _LINUX_BINFMTS_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1e2a6db..d413757 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1217,6 +1217,7 @@ struct task_struct {
 	struct plist_node pushable_tasks;
 
 	struct mm_struct *mm, *active_mm;
+	struct mm_struct *in_exec_mm;
 #if defined(SPLIT_RSS_COUNTING)
 	struct task_rss_stat	rss_stat;
 #endif
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 57c05f7..7fc6916 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -120,6 +120,30 @@ struct task_struct *find_lock_task_mm(struct task_struct *p)
 	return NULL;
 }
 
+static unsigned long calculate_rss_swap(struct task_struct *p)
+{
+	struct task_struct *t = p;
+	int mm_accounted = 0;
+	unsigned long points = 0;
+
+	do {
+		task_lock(t);
+		if (!mm_accounted && t->mm) {
+			points += get_mm_rss(t->mm);
+			points += get_mm_counter(t->mm, MM_SWAPENTS);
+			mm_accounted = 1;
+		}
+		if (t->in_exec_mm) {
+			points += get_mm_rss(t->in_exec_mm);
+			points += get_mm_counter(t->in_exec_mm, MM_SWAPENTS);
+		}
+		task_unlock(t);
+	} while_each_thread(p, t);
+
+	return points;
+}
+
+
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p, struct mem_cgroup *mem,
 			   const nodemask_t *nodemask)
@@ -157,16 +181,11 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 	if (oom_unkillable_task(p, mem, nodemask))
 		return 0;
 
-	p = find_lock_task_mm(p);
-	if (!p)
-		return 0;
-
 	/*
 	 * Shortcut check for OOM_SCORE_ADJ_MIN so the entire heuristic doesn't
 	 * need to be executed for something that cannot be killed.
 	 */
 	if (p->signal->oom_score_adj == OOM_SCORE_ADJ_MIN) {
-		task_unlock(p);
 		return 0;
 	}
 
@@ -175,7 +194,6 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 	 * priority for oom killing.
 	 */
 	if (p->flags & PF_OOM_ORIGIN) {
-		task_unlock(p);
 		return 1000;
 	}
 
@@ -190,9 +208,9 @@ unsigned int oom_badness(struct task_struct *p, struct mem_cgroup *mem,
 	 * The baseline for the badness score is the proportion of RAM that each
 	 * task's rss and swap space use.
 	 */
-	points = (get_mm_rss(p->mm) + get_mm_counter(p->mm, MM_SWAPENTS)) * 1000 /
-			totalpages;
-	task_unlock(p);
+	points = calculate_rss_swap(p) * 1000 / totalpages;
+	if (!points)
+		return 0;
 
 	/*
 	 * Root processes get 3% bonus, just like the __vm_enough_memory()
-- 
1.6.5.2





--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux