3.16.48-rc1 review patch. If anyone has any objections, please let me know. ------------------ From: Jann Horn <jann@xxxxxxxxx> commit caaee6234d05a58c5b4d05e7bf766131b810a657 upstream. By checking the effective credentials instead of the real UID / permitted capabilities, ensure that the calling process actually intended to use its credentials. To ensure that all ptrace checks use the correct caller credentials (e.g. in case out-of-tree code or newly added code omits the PTRACE_MODE_*CREDS flag), use two new flags and require one of them to be set. The problem was that when a privileged task had temporarily dropped its privileges, e.g. by calling setreuid(0, user_uid), with the intent to perform following syscalls with the credentials of a user, it still passed ptrace access checks that the user would not be able to pass. While an attacker should not be able to convince the privileged task to perform a ptrace() syscall, this is a problem because the ptrace access check is reused for things in procfs. In particular, the following somewhat interesting procfs entries only rely on ptrace access checks: /proc/$pid/stat - uses the check for determining whether pointers should be visible, useful for bypassing ASLR /proc/$pid/maps - also useful for bypassing ASLR /proc/$pid/cwd - useful for gaining access to restricted directories that contain files with lax permissions, e.g. in this scenario: lrwxrwxrwx root root /proc/13020/cwd -> /root/foobar drwx------ root root /root drwxr-xr-x root root /root/foobar -rw-r--r-- root root /root/foobar/secret Therefore, on a system where a root-owned mode 6755 binary changes its effective credentials as described and then dumps a user-specified file, this could be used by an attacker to reveal the memory layout of root's processes or reveal the contents of files he is not allowed to access (through /proc/$pid/cwd). [akpm@xxxxxxxxxxxxxxxxxxxx: fix warning] Signed-off-by: Jann Horn <jann@xxxxxxxxx> Acked-by: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Casey Schaufler <casey@xxxxxxxxxxxxxxxx> Cc: Oleg Nesterov <oleg@xxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: James Morris <james.l.morris@xxxxxxxxxx> Cc: "Serge E. Hallyn" <serge.hallyn@xxxxxxxxxx> Cc: Andy Shevchenko <andriy.shevchenko@xxxxxxxxxxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx> Cc: Willy Tarreau <w@xxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> [bwh: Backported to 3.16: - Update mm_access() calls in fs/proc/task_{,no}mmu.c too - Adjust context] Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx> --- --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -391,7 +391,7 @@ static int do_task_stat(struct seq_file state = *get_task_state(task); vsize = eip = esp = 0; - permitted = ptrace_may_access(task, PTRACE_MODE_READ | PTRACE_MODE_NOAUDIT); + permitted = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS | PTRACE_MODE_NOAUDIT); mm = get_task_mm(task); if (mm) { vsize = task_vsize(mm); --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -207,7 +207,7 @@ static int proc_pid_cmdline(struct task_ static int proc_pid_auxv(struct task_struct *task, char *buffer) { - struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ); + struct mm_struct *mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); int res = PTR_ERR(mm); if (mm && !IS_ERR(mm)) { unsigned int nwords = 0; @@ -236,7 +236,8 @@ static int proc_pid_wchan(struct task_st wchan = get_wchan(task); - if (wchan && ptrace_may_access(task, PTRACE_MODE_READ) && !lookup_symbol_name(wchan, symname)) + if (wchan && ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS) + && !lookup_symbol_name(wchan, symname)) return sprintf(buffer, "%s", symname); else return sprintf(buffer, "0"); @@ -248,7 +249,7 @@ static int lock_trace(struct task_struct int err = mutex_lock_killable(&task->signal->cred_guard_mutex); if (err) return err; - if (!ptrace_may_access(task, PTRACE_MODE_ATTACH)) { + if (!ptrace_may_access(task, PTRACE_MODE_ATTACH_FSCREDS)) { mutex_unlock(&task->signal->cred_guard_mutex); return -EPERM; } @@ -522,7 +523,7 @@ static int proc_fd_access_allowed(struct */ task = get_proc_task(inode); if (task) { - allowed = ptrace_may_access(task, PTRACE_MODE_READ); + allowed = ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); put_task_struct(task); } return allowed; @@ -557,7 +558,7 @@ static bool has_pid_permissions(struct p return true; if (in_group_p(pid->pid_gid)) return true; - return ptrace_may_access(task, PTRACE_MODE_READ); + return ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS); } @@ -672,7 +673,7 @@ static int __mem_open(struct inode *inod if (!task) return -ESRCH; - mm = mm_access(task, mode); + mm = mm_access(task, mode | PTRACE_MODE_FSCREDS); put_task_struct(task); if (IS_ERR(mm)) @@ -1729,7 +1730,7 @@ static int map_files_d_revalidate(struct if (!task) goto out_notask; - mm = mm_access(task, PTRACE_MODE_READ); + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); if (IS_ERR_OR_NULL(mm)) goto out; @@ -1864,7 +1865,7 @@ static struct dentry *proc_map_files_loo goto out; result = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; result = -ENOENT; @@ -1921,7 +1922,7 @@ proc_map_files_readdir(struct file *file goto out; ret = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ret = 0; @@ -2400,7 +2401,7 @@ static int do_io_accounting(struct task_ if (result) return result; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) { + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) { result = -EACCES; goto out_unlock; } --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -119,7 +119,7 @@ static void *proc_ns_follow_link(struct if (!task) goto out; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops); @@ -152,7 +152,7 @@ static int proc_ns_readlink(struct dentr if (!task) goto out; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) goto out_put_task; res = -ENOENT; --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -56,7 +56,29 @@ extern void exit_ptrace(struct task_stru #define PTRACE_MODE_READ 0x01 #define PTRACE_MODE_ATTACH 0x02 #define PTRACE_MODE_NOAUDIT 0x04 -/* Returns true on success, false on denial. */ +#define PTRACE_MODE_FSCREDS 0x08 +#define PTRACE_MODE_REALCREDS 0x10 + +/* shorthands for READ/ATTACH and FSCREDS/REALCREDS combinations */ +#define PTRACE_MODE_READ_FSCREDS (PTRACE_MODE_READ | PTRACE_MODE_FSCREDS) +#define PTRACE_MODE_READ_REALCREDS (PTRACE_MODE_READ | PTRACE_MODE_REALCREDS) +#define PTRACE_MODE_ATTACH_FSCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_FSCREDS) +#define PTRACE_MODE_ATTACH_REALCREDS (PTRACE_MODE_ATTACH | PTRACE_MODE_REALCREDS) + +/** + * ptrace_may_access - check whether the caller is permitted to access + * a target task. + * @task: target task + * @mode: selects type of access and caller credentials + * + * Returns true on success, false on denial. + * + * One of the flags PTRACE_MODE_FSCREDS and PTRACE_MODE_REALCREDS must + * be set in @mode to specify whether the access was requested through + * a filesystem syscall (should use effective capabilities and fsuid + * of the caller) or through an explicit syscall such as + * process_vm_writev or ptrace (and should use the real credentials). + */ extern bool ptrace_may_access(struct task_struct *task, unsigned int mode); static inline int ptrace_reparented(struct task_struct *child) --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3247,7 +3247,7 @@ find_lively_task_by_vpid(pid_t vpid) /* Reuse ptrace permission checks for now. */ err = -EACCES; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) + if (!ptrace_may_access(task, PTRACE_MODE_READ_REALCREDS)) goto errout; return task; --- a/kernel/futex.c +++ b/kernel/futex.c @@ -2797,7 +2797,7 @@ SYSCALL_DEFINE3(get_robust_list, int, pi } ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ)) + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->robust_list; --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -155,7 +155,7 @@ COMPAT_SYSCALL_DEFINE3(get_robust_list, } ret = -EPERM; - if (!ptrace_may_access(p, PTRACE_MODE_READ)) + if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS)) goto err_unlock; head = p->compat_robust_list; --- a/kernel/kcmp.c +++ b/kernel/kcmp.c @@ -122,8 +122,8 @@ SYSCALL_DEFINE5(kcmp, pid_t, pid1, pid_t &task2->signal->cred_guard_mutex); if (ret) goto err; - if (!ptrace_may_access(task1, PTRACE_MODE_READ) || - !ptrace_may_access(task2, PTRACE_MODE_READ)) { + if (!ptrace_may_access(task1, PTRACE_MODE_READ_REALCREDS) || + !ptrace_may_access(task2, PTRACE_MODE_READ_REALCREDS)) { ret = -EPERM; goto err_unlock; } --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -231,6 +231,14 @@ static int ptrace_has_cap(struct user_na static int __ptrace_may_access(struct task_struct *task, unsigned int mode) { const struct cred *cred = current_cred(), *tcred; + int dumpable = 0; + kuid_t caller_uid; + kgid_t caller_gid; + + if (!(mode & PTRACE_MODE_FSCREDS) == !(mode & PTRACE_MODE_REALCREDS)) { + WARN(1, "denying ptrace access check without PTRACE_MODE_*CREDS\n"); + return -EPERM; + } /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -240,18 +248,33 @@ static int __ptrace_may_access(struct ta * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - int dumpable = 0; + /* Don't let security modules deny introspection */ if (same_thread_group(task, current)) return 0; rcu_read_lock(); + if (mode & PTRACE_MODE_FSCREDS) { + caller_uid = cred->fsuid; + caller_gid = cred->fsgid; + } else { + /* + * Using the euid would make more sense here, but something + * in userland might rely on the old behavior, and this + * shouldn't be a security problem since + * PTRACE_MODE_REALCREDS implies that the caller explicitly + * used a syscall that requests access to another process + * (and not a filesystem syscall to procfs). + */ + caller_uid = cred->uid; + caller_gid = cred->gid; + } tcred = __task_cred(task); - if (uid_eq(cred->uid, tcred->euid) && - uid_eq(cred->uid, tcred->suid) && - uid_eq(cred->uid, tcred->uid) && - gid_eq(cred->gid, tcred->egid) && - gid_eq(cred->gid, tcred->sgid) && - gid_eq(cred->gid, tcred->gid)) + if (uid_eq(caller_uid, tcred->euid) && + uid_eq(caller_uid, tcred->suid) && + uid_eq(caller_uid, tcred->uid) && + gid_eq(caller_gid, tcred->egid) && + gid_eq(caller_gid, tcred->sgid) && + gid_eq(caller_gid, tcred->gid)) goto ok; if (ptrace_has_cap(tcred->user_ns, mode)) goto ok; @@ -318,7 +341,7 @@ static int ptrace_attach(struct task_str goto out; task_lock(task); - retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH); + retval = __ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS); task_unlock(task); if (retval) goto unlock_creds; --- a/mm/process_vm_access.c +++ b/mm/process_vm_access.c @@ -197,7 +197,7 @@ static ssize_t process_vm_rw_core(pid_t goto free_proc_pages; } - mm = mm_access(task, PTRACE_MODE_ATTACH); + mm = mm_access(task, PTRACE_MODE_ATTACH_REALCREDS); if (!mm || IS_ERR(mm)) { rc = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH; /* --- a/security/commoncap.c +++ b/security/commoncap.c @@ -142,12 +142,17 @@ int cap_ptrace_access_check(struct task_ { int ret = 0; const struct cred *cred, *child_cred; + const kernel_cap_t *caller_caps; rcu_read_lock(); cred = current_cred(); child_cred = __task_cred(child); + if (mode & PTRACE_MODE_FSCREDS) + caller_caps = &cred->cap_effective; + else + caller_caps = &cred->cap_permitted; if (cred->user_ns == child_cred->user_ns && - cap_issubset(child_cred->cap_permitted, cred->cap_permitted)) + cap_issubset(child_cred->cap_permitted, *caller_caps)) goto out; if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE)) goto out; --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -165,7 +165,7 @@ static void *m_start(struct seq_file *m, if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); + mm = mm_access(priv->task, PTRACE_MODE_READ_FSCREDS); if (!mm || IS_ERR(mm)) return mm; down_read(&mm->mmap_sem); @@ -1188,7 +1188,7 @@ static ssize_t pagemap_read(struct file if (!pm.buffer) goto out_task; - mm = mm_access(task, PTRACE_MODE_READ); + mm = mm_access(task, PTRACE_MODE_READ_FSCREDS); ret = PTR_ERR(mm); if (!mm || IS_ERR(mm)) goto out_free; --- a/fs/proc/task_nommu.c +++ b/fs/proc/task_nommu.c @@ -216,7 +216,7 @@ static void *m_start(struct seq_file *m, if (!priv->task) return ERR_PTR(-ESRCH); - mm = mm_access(priv->task, PTRACE_MODE_READ); + mm = mm_access(priv->task, PTRACE_MODE_READ_FSCREDS); if (!mm || IS_ERR(mm)) { put_task_struct(priv->task); priv->task = NULL;