Re: [PATCH ghak90 V8 16/16] audit: add capcontid to set contid outside init_user_ns

Richard Guy Briggs <rgb@xxxxxxxxxx> · Tue, 4 Feb 2020 19:39:30 -0500

On 2020-01-22 16:29, Paul Moore wrote:
> On Tue, Dec 31, 2019 at 2:51 PM Richard Guy Briggs <rgb@xxxxxxxxxx> wrote:
> >
> > Provide a mechanism similar to CAP_AUDIT_CONTROL to explicitly give a
> > process in a non-init user namespace the capability to set audit
> > container identifiers.
> >
> > Provide /proc/$PID/audit_capcontid interface to capcontid.
> > Valid values are: 1==enabled, 0==disabled
> 
> It would be good to be more explicit about "enabled" and "disabled" in
> the commit description.  For example, which setting allows the target
> task to set audit container IDs of it's children processes?

Ok...

> > Report this action in message type AUDIT_SET_CAPCONTID 1022 with fields
> > opid= capcontid= old-capcontid=
> >
> > Signed-off-by: Richard Guy Briggs <rgb@xxxxxxxxxx>
> > ---
> >  fs/proc/base.c             | 55 ++++++++++++++++++++++++++++++++++++++++++++++
> >  include/linux/audit.h      | 14 ++++++++++++
> >  include/uapi/linux/audit.h |  1 +
> >  kernel/audit.c             | 35 +++++++++++++++++++++++++++++
> >  4 files changed, 105 insertions(+)
> 
> ...
> 
> > diff --git a/fs/proc/base.c b/fs/proc/base.c
> > index 26091800180c..283ef8e006e7 100644
> > --- a/fs/proc/base.c
> > +++ b/fs/proc/base.c
> > @@ -1360,6 +1360,59 @@ static ssize_t proc_contid_write(struct file *file, const char __user *buf,
> >         .write          = proc_contid_write,
> >         .llseek         = generic_file_llseek,
> >  };
> > +
> > +static ssize_t proc_capcontid_read(struct file *file, char __user *buf,
> > +                                 size_t count, loff_t *ppos)
> > +{
> > +       struct inode *inode = file_inode(file);
> > +       struct task_struct *task = get_proc_task(inode);
> > +       ssize_t length;
> > +       char tmpbuf[TMPBUFLEN];
> > +
> > +       if (!task)
> > +               return -ESRCH;
> > +       /* if we don't have caps, reject */
> > +       if (!capable(CAP_AUDIT_CONTROL) && !audit_get_capcontid(current))
> > +               return -EPERM;
> > +       length = scnprintf(tmpbuf, TMPBUFLEN, "%u", audit_get_capcontid(task));
> > +       put_task_struct(task);
> > +       return simple_read_from_buffer(buf, count, ppos, tmpbuf, length);
> > +}
> > +
> > +static ssize_t proc_capcontid_write(struct file *file, const char __user *buf,
> > +                                  size_t count, loff_t *ppos)
> > +{
> > +       struct inode *inode = file_inode(file);
> > +       u32 capcontid;
> > +       int rv;
> > +       struct task_struct *task = get_proc_task(inode);
> > +
> > +       if (!task)
> > +               return -ESRCH;
> > +       if (*ppos != 0) {
> > +               /* No partial writes. */
> > +               put_task_struct(task);
> > +               return -EINVAL;
> > +       }
> > +
> > +       rv = kstrtou32_from_user(buf, count, 10, &capcontid);
> > +       if (rv < 0) {
> > +               put_task_struct(task);
> > +               return rv;
> > +       }
> > +
> > +       rv = audit_set_capcontid(task, capcontid);
> > +       put_task_struct(task);
> > +       if (rv < 0)
> > +               return rv;
> > +       return count;
> > +}
> > +
> > +static const struct file_operations proc_capcontid_operations = {
> > +       .read           = proc_capcontid_read,
> > +       .write          = proc_capcontid_write,
> > +       .llseek         = generic_file_llseek,
> > +};
> >  #endif
> >
> >  #ifdef CONFIG_FAULT_INJECTION
> > @@ -3121,6 +3174,7 @@ static int proc_stack_depth(struct seq_file *m, struct pid_namespace *ns,
> >         REG("loginuid",   S_IWUSR|S_IRUGO, proc_loginuid_operations),
> >         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
> >         REG("audit_containerid", S_IWUSR|S_IRUSR, proc_contid_operations),
> > +       REG("audit_capcontainerid", S_IWUSR|S_IRUSR|S_IRUSR, proc_capcontid_operations),
> >  #endif
> >  #ifdef CONFIG_FAULT_INJECTION
> >         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
> > @@ -3522,6 +3576,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
> >         REG("loginuid",  S_IWUSR|S_IRUGO, proc_loginuid_operations),
> >         REG("sessionid",  S_IRUGO, proc_sessionid_operations),
> >         REG("audit_containerid", S_IWUSR|S_IRUSR, proc_contid_operations),
> > +       REG("audit_capcontainerid", S_IWUSR|S_IRUSR|S_IRUSR, proc_capcontid_operations),
> >  #endif
> >  #ifdef CONFIG_FAULT_INJECTION
> >         REG("make-it-fail", S_IRUGO|S_IWUSR, proc_fault_inject_operations),
> > diff --git a/include/linux/audit.h b/include/linux/audit.h
> > index 28b9c7cd86a6..62c453306c2a 100644
> > --- a/include/linux/audit.h
> > +++ b/include/linux/audit.h
> > @@ -116,6 +116,7 @@ struct audit_task_info {
> >         kuid_t                  loginuid;
> >         unsigned int            sessionid;
> >         struct audit_contobj    *cont;
> > +       u32                     capcontid;
> 
> Where is the code change that actually uses this to enforce the
> described policy on setting an audit container ID?

Oops, lost in shuffle of refactorisation when dumping the netlink code in
favour of /proc.

> > diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h
> > index 2844d78cd7af..01251e6dcec0 100644
> > --- a/include/uapi/linux/audit.h
> > +++ b/include/uapi/linux/audit.h
> > @@ -73,6 +73,7 @@
> >  #define AUDIT_GET_FEATURE      1019    /* Get which features are enabled */
> >  #define AUDIT_CONTAINER_OP     1020    /* Define the container id and info */
> >  #define AUDIT_SIGNAL_INFO2     1021    /* Get info auditd signal sender */
> > +#define AUDIT_SET_CAPCONTID    1022    /* Set cap_contid of a task */
> >
> >  #define AUDIT_FIRST_USER_MSG   1100    /* Userspace messages mostly uninteresting to kernel */
> >  #define AUDIT_USER_AVC         1107    /* We filter this differently */
> > diff --git a/kernel/audit.c b/kernel/audit.c
> > index 1287f0b63757..1c22dd084ae8 100644
> > --- a/kernel/audit.c
> > +++ b/kernel/audit.c
> > @@ -2698,6 +2698,41 @@ static bool audit_contid_isowner(struct task_struct *tsk)
> >         return false;
> >  }
> >
> > +int audit_set_capcontid(struct task_struct *task, u32 enable)
> > +{
> > +       u32 oldcapcontid;
> > +       int rc = 0;
> > +       struct audit_buffer *ab;
> > +
> > +       if (!task->audit)
> > +               return -ENOPROTOOPT;
> > +       oldcapcontid = audit_get_capcontid(task);
> > +       /* if task is not descendant, block */
> > +       if (task == current)
> > +               rc = -EBADSLT;
> > +       else if (!task_is_descendant(current, task))
> > +               rc = -EXDEV;
> 
> See my previous comments about error code sanity.

I'll go with EXDEV.

> > +       else if (current_user_ns() == &init_user_ns) {
> > +               if (!capable(CAP_AUDIT_CONTROL) && !audit_get_capcontid(current))
> > +                       rc = -EPERM;
> 
> I think we just want to use ns_capable() in the context of the current
> userns to check CAP_AUDIT_CONTROL, yes?  Something like this ...

I thought we had firmly established in previous discussion that
CAP_AUDIT_CONTROL in anything other than init_user_ns was completely irrelevant
and untrustable.

>   if (current_user_ns() != &init_user_ns) {
>     if (!ns_capable(CAP_AUDIT_CONTROL) || !audit_get_capcontid())
>       rc = -EPERM;
>   } else if (!capable(CAP_AUDIT_CONTROL))
>     rc = -EPERM;
> 
> > +       }
> > +       if (!rc)
> > +               task->audit->capcontid = enable;
> > +
> > +       if (!audit_enabled)
> > +               return rc;
> > +
> > +       ab = audit_log_start(audit_context(), GFP_KERNEL, AUDIT_SET_CAPCONTID);
> > +       if (!ab)
> > +               return rc;
> > +
> > +       audit_log_format(ab,
> > +                        "opid=%d capcontid=%u old-capcontid=%u",
> > +                        task_tgid_nr(task), enable, oldcapcontid);
> > +       audit_log_end(ab);
> 
> My prior comments about recording the success/failure, or not emitting
> the record on failure, seem relevant here too.

It should be recorded in the syscall record.

> > +       return rc;
> > +}
> 
> paul moore

- RGB

--
Richard Guy Briggs <rgb@xxxxxxxxxx>
Sr. S/W Engineer, Kernel Security, Base Operating Systems
Remote, Ottawa, Red Hat Canada
IRC: rgb, SunRaycer
Voice: +1.647.777.2635, Internal: (81) 32635