[PATCH v2 2/8] exec: turn self_exec_id into self_privunit

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This ensures that self_privunit ("privilege unit locally unique ID")
is only shared by processes that share the mm_struct and the signal_struct;
not just spatially, but also temporally. In other words, if you do execve()
or clone() without CLONE_THREAD, you get a new privunit that has never
been used before.

One reason for doing this is that it prevents an attacker from sending an
arbitrary signal to a parent process after performing 2^32-1 execve()
calls.

The second reason for this is that it permits using the self_exec_luid in
a later patch to check during a ptrace access whether subject and object
are temporally and spatially equal for privilege checking purposes.

The implementation of locally unique IDs is in sched.h and exec.c for now
because those are the only users so far - if anything else wants to use
them in the future, they can be moved elsewhere.

changed in v2:
 - have 2^64 IDs per CPU instead of 2^64 shared ones (luid scheme,
   suggested by Andy Lutomirski)
 - take task_lock for reading in setup_new_exec() while bumping the LUID

Signed-off-by: Jann Horn <jann@xxxxxxxxx>
---
 fs/exec.c             | 41 +++++++++++++++++++++++++++++++++++++++--
 include/linux/sched.h | 17 +++++++++++++++--
 kernel/fork.c         |  5 +++--
 kernel/signal.c       |  5 ++++-
 4 files changed, 61 insertions(+), 7 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 84430ee..fcc11f0 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1281,6 +1281,34 @@ void would_dump(struct linux_binprm *bprm, struct file *file)
 }
 EXPORT_SYMBOL(would_dump);
 
+static DEFINE_PER_CPU(u64, luid_counters);
+
+static int __init init_luid_counters(void)
+{
+	unsigned int cpu;
+
+	for_each_possible_cpu(cpu) {
+		/* value 0 is reserved for init */
+		per_cpu(luid_counters, cpu) = 1;
+	}
+
+	return 0;
+}
+early_initcall(init_luid_counters);
+
+/*
+ * Allocates a new LUID and writes the allocated LUID to @out.
+ * This function must not be called from IRQ context.
+ */
+void fill_luid(struct luid *out)
+{
+	preempt_disable();
+	out->count = raw_cpu_read(luid_counters);
+	raw_cpu_add(luid_counters, 1);
+	out->cpu = smp_processor_id();
+	preempt_enable();
+}
+
 void setup_new_exec(struct linux_binprm * bprm)
 {
 	arch_pick_mmap_layout(current->mm);
@@ -1313,8 +1341,17 @@ void setup_new_exec(struct linux_binprm * bprm)
 	}
 
 	/* An exec changes our domain. We are no longer part of the thread
-	   group */
-	current->self_exec_id++;
+	 * group.
+	 * The privunit luid is regenerated with the tasklist_lock held for
+	 * reading to allow do_notify_parent() (which only runs with
+	 * tasklist_lock held for writing) to inspect privunit IDs of other
+	 * tasks without taking the cred_guard_light (which wouldn't work
+	 * because the tasklist_lock is held).
+	 */
+	read_lock(&tasklist_lock);
+	fill_luid(&current->self_privunit);
+	read_unlock(&tasklist_lock);
+
 	flush_signal_handlers(current, 0);
 	do_close_on_exec(current->files);
 }
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2a1df2f..fa90e36 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1467,6 +1467,19 @@ struct tlbflush_unmap_batch {
 	bool writable;
 };
 
+/* locally unique ID */
+struct luid {
+	u64 count;
+	unsigned int cpu;
+};
+
+void fill_luid(struct luid *out);
+
+static inline bool luid_eq(const struct luid *a, const struct luid *b)
+{
+	return a->count == b->count && a->cpu == b->cpu;
+}
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	void *stack;
@@ -1688,8 +1701,8 @@ struct task_struct {
 	struct seccomp seccomp;
 
 /* Thread group tracking */
-   	u32 parent_exec_id;
-   	u32 self_exec_id;
+	struct luid parent_privunit;
+	struct luid self_privunit;
 /* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
  * mempolicy */
 	spinlock_t alloc_lock;
diff --git a/kernel/fork.c b/kernel/fork.c
index 2d46f3a..e1bd501 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1567,6 +1567,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 			p->exit_signal = (clone_flags & CSIGNAL);
 		p->group_leader = p;
 		p->tgid = p->pid;
+		fill_luid(&p->self_privunit);
 	}
 
 	p->nr_dirtied = 0;
@@ -1597,10 +1598,10 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	/* CLONE_PARENT re-uses the old parent */
 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
 		p->real_parent = current->real_parent;
-		p->parent_exec_id = current->parent_exec_id;
+		p->parent_privunit = current->parent_privunit;
 	} else {
 		p->real_parent = current;
-		p->parent_exec_id = current->self_exec_id;
+		p->parent_privunit = current->self_privunit;
 	}
 
 	spin_lock(&current->sighand->siglock);
diff --git a/kernel/signal.c b/kernel/signal.c
index af21afc..3dbd25b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -1566,6 +1566,8 @@ ret:
  * Let a parent know about the death of a child.
  * For a stopped/continued status change, use do_notify_parent_cldstop instead.
  *
+ * Must be called with tasklist_lock held for writing.
+ *
  * Returns true if our parent ignored us and so we've switched to
  * self-reaping.
  */
@@ -1590,7 +1592,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig)
 		 * This is only possible if parent == real_parent.
 		 * Check if it has changed security domain.
 		 */
-		if (tsk->parent_exec_id != tsk->parent->self_exec_id)
+		if (!luid_eq(&tsk->parent_privunit,
+			     &tsk->parent->self_privunit))
 			sig = SIGCHLD;
 	}
 
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux