"Serge E. Hallyn" <serge@xxxxxxxxxx> writes: > ptrace is allowed to tasks in the same user namespace according to > the usual rules (i.e. the same rules as for two tasks in the init > user namespace). ptrace is also allowed to a user namespace to > which the current task the has CAP_SYS_PTRACE capability. The uid equality check below is broken. Eric > Signed-off-by: Serge E. Hallyn <serge.hallyn@xxxxxxxxxxxxx> > --- > include/linux/capability.h | 2 ++ > kernel/ptrace.c | 40 ++++++++++++++++++++++++++++------------ > security/commoncap.c | 26 +++++++++++++++++++++----- > 3 files changed, 51 insertions(+), 17 deletions(-) > > diff --git a/include/linux/capability.h b/include/linux/capability.h > index cc3e976..777a166 100644 > --- a/include/linux/capability.h > +++ b/include/linux/capability.h > @@ -543,6 +543,8 @@ extern const kernel_cap_t __cap_init_eff_set; > */ > #define has_capability(t, cap) (security_real_capable((t), &init_user_ns, (cap)) == 0) > > +#define has_ns_capability(t, ns, cap) (security_real_capable((t), (ns), (cap)) == 0) > + > /** > * has_capability_noaudit - Determine if a task has a superior capability available (unaudited) > * @t: The task in question > diff --git a/kernel/ptrace.c b/kernel/ptrace.c > index 99bbaa3..aed24eb 100644 > --- a/kernel/ptrace.c > +++ b/kernel/ptrace.c > @@ -116,6 +116,19 @@ int ptrace_check_attach(struct task_struct *child, int kill) > return ret; > } > > +static inline int may_ptrace_ns(struct task_struct *t) Can we name this ptrace_capable? Since you are only wrapping the capability check? With a name like may_ptrace_ns I imagine very different semantics. > +{ > + struct user_namespace *ns; > + int ret; > + > + rcu_read_lock(); > + ns = task_cred_xxx(t, user)->user_ns; > + ret = ns_capable(ns, CAP_SYS_PTRACE); > + rcu_read_unlock(); > + > + return ret; > +} > + > int __ptrace_may_access(struct task_struct *task, unsigned int mode) > { > const struct cred *cred = current_cred(), *tcred; > @@ -134,21 +147,24 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) > return 0; > rcu_read_lock(); > tcred = __task_cred(task); > - if ((cred->uid != tcred->euid || > - cred->uid != tcred->suid || > - cred->uid != tcred->uid || > - cred->gid != tcred->egid || > - cred->gid != tcred->sgid || > - cred->gid != tcred->gid) && > - !capable(CAP_SYS_PTRACE)) { > - rcu_read_unlock(); > - return -EPERM; > - } > + if (cred->user->user_ns == tcred->user->user_ns && > + (cred->uid == tcred->euid || > + cred->uid == tcred->suid || > + cred->uid == tcred->uid || > + cred->gid == tcred->egid || > + cred->gid == tcred->sgid || > + cred->gid == tcred->gid)) > + goto ok; This needs to be: > + if (cred->user->user_ns == tcred->user->user_ns && > + (cred->uid == tcred->euid && > + cred->uid == tcred->suid && > + cred->uid == tcred->uid && > + cred->gid == tcred->egid && > + cred->gid == tcred->sgid && > + cred->gid == tcred->gid)) > + goto ok; > + if (ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) > + goto ok; > + rcu_read_unlock(); > + return -EPERM; > +ok: > rcu_read_unlock(); > smp_rmb(); > if (task->mm) > dumpable = get_dumpable(task->mm); > - if (!dumpable && !capable(CAP_SYS_PTRACE)) > + if (!dumpable && !may_ptrace_ns(task)) > return -EPERM; > > return security_ptrace_access_check(task, mode); > @@ -198,7 +214,7 @@ int ptrace_attach(struct task_struct *task) > goto unlock_tasklist; > > task->ptrace = PT_PTRACED; > - if (capable(CAP_SYS_PTRACE)) > + if (may_ptrace_ns(task)) > task->ptrace |= PT_PTRACE_CAP; > > __ptrace_link(task, current); > diff --git a/security/commoncap.c b/security/commoncap.c > index 9d910e6..bd0bcc6 100644 > --- a/security/commoncap.c > +++ b/security/commoncap.c > @@ -136,12 +136,20 @@ int cap_settime(struct timespec *ts, struct timezone *tz) > int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) > { > int ret = 0; > + struct cred *cred, *tcred; > > rcu_read_lock(); > - if (!cap_issubset(__task_cred(child)->cap_permitted, > - current_cred()->cap_permitted) && > + cred = current_cred(); > + tcred = __task_cred(child); > + if (cred->user->user_ns != tcred->user->user_ns) { This probably deserves a comment about why cap_issubset isn't needed here. Aka we implicitly have all caps in child user namespaces so if we have CAP_SYS_PTRACE we know we have them all. > + if (!ns_capable(tcred->user->user_ns, CAP_SYS_PTRACE)) > + ret = -EPERM; > + goto out; > + } > + if (!cap_issubset(tcred->cap_permitted, cred->cap_permitted) && > !capable(CAP_SYS_PTRACE)) > ret = -EPERM; > +out: > rcu_read_unlock(); > return ret; > } > @@ -156,12 +164,20 @@ int cap_ptrace_access_check(struct task_struct *child, unsigned int mode) > int cap_ptrace_traceme(struct task_struct *parent) > { > int ret = 0; > + struct cred *cred, *tcred; > > rcu_read_lock(); > - if (!cap_issubset(current_cred()->cap_permitted, > - __task_cred(parent)->cap_permitted) && > - !has_capability(parent, CAP_SYS_PTRACE)) > + cred = __task_cred(parent); > + tcred = current_cred(); > + if (cred->user->user_ns != tcred->user->user_ns) { > + if (!has_ns_capability(parent, tcred->user->user_ns, CAP_SYS_PTRACE)) > + ret = -EPERM; > + goto out; > + } > + if (!cap_issubset(tcred->cap_permitted, cred->cap_permitted) && > + !has_ns_capability(parent, tcred->user->user_ns, CAP_SYS_PTRACE)) > ret = -EPERM; > +out: > rcu_read_unlock(); > return ret; > } _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers