3.16.52-rc1 review patch. If anyone has any objections, please let me know. ------------------ From: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> commit bfedb589252c01fa505ac9f6f2a3d5d68d707ef4 upstream. During exec dumpable is cleared if the file that is being executed is not readable by the user executing the file. A bug in ptrace_may_access allows reading the file if the executable happens to enter into a subordinate user namespace (aka clone(CLONE_NEWUSER), unshare(CLONE_NEWUSER), or setns(fd, CLONE_NEWUSER). This problem is fixed with only necessary userspace breakage by adding a user namespace owner to mm_struct, captured at the time of exec, so it is clear in which user namespace CAP_SYS_PTRACE must be present in to be able to safely give read permission to the executable. The function ptrace_may_access is modified to verify that the ptracer has CAP_SYS_ADMIN in task->mm->user_ns instead of task->cred->user_ns. This ensures that if the task changes it's cred into a subordinate user namespace it does not become ptraceable. The function ptrace_attach is modified to only set PT_PTRACE_CAP when CAP_SYS_PTRACE is held over task->mm->user_ns. The intent of PT_PTRACE_CAP is to be a flag to note that whatever permission changes the task might go through the tracer has sufficient permissions for it not to be an issue. task->cred->user_ns is always the same as or descendent of mm->user_ns. Which guarantees that having CAP_SYS_PTRACE over mm->user_ns is the worst case for the tasks credentials. To prevent regressions mm->dumpable and mm->user_ns are not considered when a task has no mm. As simply failing ptrace_may_attach causes regressions in privileged applications attempting to read things such as /proc/<pid>/stat Acked-by: Kees Cook <keescook@xxxxxxxxxxxx> Tested-by: Cyrill Gorcunov <gorcunov@xxxxxxxxxx> Fixes: 8409cca70561 ("userns: allow ptrace from non-init user namespaces") Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> [bwh: Backported to 3.16: adjust context] Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx> --- include/linux/mm_types.h | 1 + kernel/fork.c | 9 ++++++--- kernel/ptrace.c | 26 +++++++++++--------------- mm/init-mm.c | 2 ++ 4 files changed, 20 insertions(+), 18 deletions(-) --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -419,6 +419,7 @@ struct mm_struct { */ struct task_struct __rcu *owner; #endif + struct user_namespace *user_ns; /* store ref to file /proc/<pid>/exe symlink points to */ struct file *exe_file; --- a/kernel/fork.c +++ b/kernel/fork.c @@ -527,7 +527,8 @@ static void mm_init_aio(struct mm_struct #endif } -static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p) +static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, + struct user_namespace *user_ns) { atomic_set(&mm->mm_users, 1); atomic_set(&mm->mm_count, 1); @@ -551,6 +552,7 @@ static struct mm_struct *mm_init(struct if (likely(!mm_alloc_pgd(mm))) { mmu_notifier_mm_init(mm); + mm->user_ns = get_user_ns(user_ns); return mm; } @@ -588,7 +590,7 @@ struct mm_struct *mm_alloc(void) memset(mm, 0, sizeof(*mm)); mm_init_cpumask(mm); - return mm_init(mm, current); + return mm_init(mm, current, current_user_ns()); } /* @@ -603,6 +605,7 @@ void __mmdrop(struct mm_struct *mm) destroy_context(mm); mmu_notifier_mm_destroy(mm); check_mm(mm); + put_user_ns(mm->user_ns); free_mm(mm); } EXPORT_SYMBOL_GPL(__mmdrop); @@ -822,7 +825,7 @@ static struct mm_struct *dup_mm(struct t #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !USE_SPLIT_PMD_PTLOCKS mm->pmd_huge_pte = NULL; #endif - if (!mm_init(mm, tsk)) + if (!mm_init(mm, tsk, mm->user_ns)) goto fail_nomem; if (init_new_context(tsk, mm)) --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -230,7 +230,7 @@ static int ptrace_has_cap(struct user_na static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; - int dumpable = 0; + struct mm_struct *mm; kuid_t caller_uid; kgid_t caller_gid; @@ -281,16 +281,11 @@ static int __ptrace_may_access(struct ta return -EPERM; ok: rcu_read_unlock(); - smp_rmb(); - if (task->mm) - dumpable = get_dumpable(task->mm); - rcu_read_lock(); - if (dumpable != SUID_DUMP_USER && - !ptrace_has_cap(__task_cred(task)->user_ns, mode)) { - rcu_read_unlock(); - return -EPERM; - } - rcu_read_unlock(); + mm = task->mm; + if (mm && + ((get_dumpable(mm) != SUID_DUMP_USER) && + !ptrace_has_cap(mm->user_ns, mode))) + return -EPERM; return security_ptrace_access_check(task, mode); } @@ -341,6 +336,11 @@ static int ptrace_attach(struct task_str task_lock(task); retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); + if (!retval) { + struct mm_struct *mm = task->mm; + if (mm && ns_capable(mm->user_ns, CAP_SYS_PTRACE)) + flags |= PT_PTRACE_CAP; + } task_unlock(task); if (retval) goto unlock_creds; @@ -354,10 +354,6 @@ static int ptrace_attach(struct task_str if (seize) flags |= PT_SEIZED; - rcu_read_lock(); - if (ns_capable(__task_cred(task)->user_ns, CAP_SYS_PTRACE)) - flags |= PT_PTRACE_CAP; - rcu_read_unlock(); task->ptrace = flags; __ptrace_link(task, current); --- a/mm/init-mm.c +++ b/mm/init-mm.c @@ -6,6 +6,7 @@ #include <linux/cpumask.h> #include <linux/atomic.h> +#include <linux/user_namespace.h> #include <asm/pgtable.h> #include <asm/mmu.h> @@ -21,5 +22,6 @@ struct mm_struct init_mm = { .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), .mmlist = LIST_HEAD_INIT(init_mm.mmlist), + .user_ns = &init_user_ns, INIT_MM_CONTEXT(init_mm) };