Maintainer dependent but at least on x86 patches people tend to prefer capital letter in the short summary i.e. s/add/Add/ On Thu May 16, 2024 at 12:22 PM EEST, Jonathan Calmels wrote: > This patch adds a new capability security bit designed to constrain a > task’s userns capability set to its bounding set. The reason for this is > twofold: > > - This serves as a quick and easy way to lock down a set of capabilities > for a task, thus ensuring that any namespace it creates will never be > more privileged than itself is. > - This helps userspace transition to more secure defaults by not requiring > specific logic for the userns capability set, or libcap support. > > Example: > > # capsh --secbits=$((1 << 8)) --drop=cap_sys_rawio -- \ > -c 'unshare -r grep Cap /proc/self/status' > CapInh: 0000000000000000 > CapPrm: 000001fffffdffff > CapEff: 000001fffffdffff > CapBnd: 000001fffffdffff > CapAmb: 0000000000000000 > CapUNs: 000001fffffdffff > > Signed-off-by: Jonathan Calmels <jcalmels@xxxxxxxx> > --- > include/linux/securebits.h | 1 + > include/uapi/linux/securebits.h | 11 ++++++++++- > kernel/user_namespace.c | 5 +++++ > 3 files changed, 16 insertions(+), 1 deletion(-) > > diff --git a/include/linux/securebits.h b/include/linux/securebits.h > index 656528673983..5f9d85cd69c3 100644 > --- a/include/linux/securebits.h > +++ b/include/linux/securebits.h > @@ -5,4 +5,5 @@ > #include <uapi/linux/securebits.h> > > #define issecure(X) (issecure_mask(X) & current_cred_xxx(securebits)) > +#define iscredsecure(cred, X) (issecure_mask(X) & cred->securebits) > #endif /* !_LINUX_SECUREBITS_H */ > diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h > index d6d98877ff1a..2da3f4be4531 100644 > --- a/include/uapi/linux/securebits.h > +++ b/include/uapi/linux/securebits.h > @@ -52,10 +52,19 @@ > #define SECBIT_NO_CAP_AMBIENT_RAISE_LOCKED \ > (issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE_LOCKED)) > > +/* When set, user namespace capabilities are restricted to their parent's bounding set. */ > +#define SECURE_USERNS_STRICT_CAPS 8 > +#define SECURE_USERNS_STRICT_CAPS_LOCKED 9 /* make bit-8 immutable */ > + > +#define SECBIT_USERNS_STRICT_CAPS (issecure_mask(SECURE_USERNS_STRICT_CAPS)) > +#define SECBIT_USERNS_STRICT_CAPS_LOCKED \ > + (issecure_mask(SECURE_USERNS_STRICT_CAPS_LOCKED)) > + > #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ > issecure_mask(SECURE_NO_SETUID_FIXUP) | \ > issecure_mask(SECURE_KEEP_CAPS) | \ > - issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE)) > + issecure_mask(SECURE_NO_CAP_AMBIENT_RAISE) | \ > + issecure_mask(SECURE_USERNS_STRICT_CAPS)) > #define SECURE_ALL_LOCKS (SECURE_ALL_BITS << 1) > > #endif /* _UAPI_LINUX_SECUREBITS_H */ > diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c > index 7e624607330b..53848e2b68cd 100644 > --- a/kernel/user_namespace.c > +++ b/kernel/user_namespace.c > @@ -10,6 +10,7 @@ > #include <linux/cred.h> > #include <linux/securebits.h> > #include <linux/security.h> > +#include <linux/capability.h> > #include <linux/keyctl.h> > #include <linux/key-type.h> > #include <keys/user-type.h> > @@ -42,6 +43,10 @@ static void dec_user_namespaces(struct ucounts *ucounts) > > static void set_cred_user_ns(struct cred *cred, struct user_namespace *user_ns) > { > + /* Limit userns capabilities to our parent's bounding set. */ > + if (iscredsecure(cred, SECURE_USERNS_STRICT_CAPS)) > + cred->cap_userns = cap_intersect(cred->cap_userns, cred->cap_bset); > + > /* Start with the capabilities defined in the userns set. */ > cred->cap_bset = cred->cap_userns; > cred->cap_permitted = cred->cap_userns; BR, Jarkko