On Tue, Feb 28, 2023 at 6:47 AM Mateusz Guzik <mjguzik@xxxxxxxxx> wrote: > > Personally I would only touch it as a result of losing a bet (and I'm > not taking any with this in play), but that's just my $0.05 (adjusted > for inflation). Heh. I took that as a challenge. It wasn't actually all that bad (famous last words). For type safety reasons I decided to use a struct wrapper typedef struct { u64 val; } kernel_cap_t; to avoid any nasty silent integer value conversions. But then it was literally just a matter of cleaning up some of the insanity. I think the fs/proc/array.c modification is an example of just how bad things used to be: --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, static void render_cap_t(struct seq_file *m, const char *header, kernel_cap_t *a) { - unsigned __capi; - seq_puts(m, header); - CAP_FOR_EACH_U32(__capi) { - seq_put_hex_ll(m, NULL, - a->cap[CAP_LAST_U32 - __capi], 8); - } + seq_put_hex_ll(m, NULL, a->val, 16); seq_putc(m, '\n'); } note how the code literally did that odd CAP_LAST_U32 - __capi in there just to get the natural "high word first" that is exactly what you get if you just print out the value as a hex number. Now, the actual user mode conversions still exist, but even those often got simplified. Have I actually *tested* this? Of course not. I'm lazy, and everything I write obviously always works on the first try anyway, so why should I? So take this patch with a healthy dose of salt, but it looks sane to me, and it does build cleanly (and with our type system, that actually does say quite a lot). This actually looks sane enough that I might even boot it. Call me crazy. Linus
fs/proc/array.c | 7 +- include/linux/capability.h | 128 +++++---------------- io_uring/fdinfo.c | 4 +- kernel/auditsc.c | 6 +- kernel/capability.c | 97 ++++++++-------- kernel/umh.c | 41 +++---- security/apparmor/policy_unpack.c | 40 +++++-- security/commoncap.c | 49 ++++---- .../selftests/bpf/progs/test_deny_namespace.c | 7 +- 9 files changed, 150 insertions(+), 229 deletions(-) diff --git a/fs/proc/array.c b/fs/proc/array.c index 49283b8103c7..9b0315d34c58 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -300,13 +300,8 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) static void render_cap_t(struct seq_file *m, const char *header, kernel_cap_t *a) { - unsigned __capi; - seq_puts(m, header); - CAP_FOR_EACH_U32(__capi) { - seq_put_hex_ll(m, NULL, - a->cap[CAP_LAST_U32 - __capi], 8); - } + seq_put_hex_ll(m, NULL, a->val, 16); seq_putc(m, '\n'); } diff --git a/include/linux/capability.h b/include/linux/capability.h index d3c6c2d1ff45..9bd50f070494 100644 --- a/include/linux/capability.h +++ b/include/linux/capability.h @@ -15,28 +15,25 @@ #include <uapi/linux/capability.h> #include <linux/uidgid.h> +#include <linux/bits.h> #define _KERNEL_CAPABILITY_VERSION _LINUX_CAPABILITY_VERSION_3 -#define _KERNEL_CAPABILITY_U32S _LINUX_CAPABILITY_U32S_3 extern int file_caps_enabled; -typedef struct kernel_cap_struct { - __u32 cap[_KERNEL_CAPABILITY_U32S]; -} kernel_cap_t; +typedef struct { u64 val; } kernel_cap_t; /* same as vfs_ns_cap_data but in cpu endian and always filled completely */ struct cpu_vfs_cap_data { __u32 magic_etc; + kuid_t rootid; kernel_cap_t permitted; kernel_cap_t inheritable; - kuid_t rootid; }; #define _USER_CAP_HEADER_SIZE (sizeof(struct __user_cap_header_struct)) #define _KERNEL_CAP_T_SIZE (sizeof(kernel_cap_t)) - struct file; struct inode; struct dentry; @@ -47,13 +44,6 @@ struct mnt_idmap; extern const kernel_cap_t __cap_empty_set; extern const kernel_cap_t __cap_init_eff_set; -/* - * Internal kernel functions only - */ - -#define CAP_FOR_EACH_U32(__capi) \ - for (__capi = 0; __capi < _KERNEL_CAPABILITY_U32S; ++__capi) - /* * CAP_FS_MASK and CAP_NFSD_MASKS: * @@ -67,104 +57,52 @@ extern const kernel_cap_t __cap_init_eff_set; * 2. The security.* and trusted.* xattrs are fs-related MAC permissions */ -# define CAP_FS_MASK_B0 (CAP_TO_MASK(CAP_CHOWN) \ - | CAP_TO_MASK(CAP_MKNOD) \ - | CAP_TO_MASK(CAP_DAC_OVERRIDE) \ - | CAP_TO_MASK(CAP_DAC_READ_SEARCH) \ - | CAP_TO_MASK(CAP_FOWNER) \ - | CAP_TO_MASK(CAP_FSETID)) - -# define CAP_FS_MASK_B1 (CAP_TO_MASK(CAP_MAC_OVERRIDE)) - -#if _KERNEL_CAPABILITY_U32S != 2 -# error Fix up hand-coded capability macro initializers -#else /* HAND-CODED capability initializers */ +# define CAP_FS_MASK (BIT_ULL(CAP_CHOWN) \ + | BIT_ULL(CAP_MKNOD) \ + | BIT_ULL(CAP_DAC_OVERRIDE) \ + | BIT_ULL(CAP_DAC_READ_SEARCH) \ + | BIT_ULL(CAP_FOWNER) \ + | BIT_ULL(CAP_FSETID) \ + | BIT_ULL(CAP_MAC_OVERRIDE)) +#define CAP_VALID_MASK (BIT_ULL(CAP_LAST_CAP+1)-1) -#define CAP_LAST_U32 ((_KERNEL_CAPABILITY_U32S) - 1) -#define CAP_LAST_U32_VALID_MASK (CAP_TO_MASK(CAP_LAST_CAP + 1) -1) +# define CAP_EMPTY_SET ((kernel_cap_t) { 0 }) +# define CAP_FULL_SET ((kernel_cap_t) { CAP_VALID_MASK }) +# define CAP_FS_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_LINUX_IMMUTABLE) }) +# define CAP_NFSD_SET ((kernel_cap_t) { CAP_FS_MASK | BIT_ULL(CAP_SYS_RESOURCE) }) -# define CAP_EMPTY_SET ((kernel_cap_t){{ 0, 0 }}) -# define CAP_FULL_SET ((kernel_cap_t){{ ~0, CAP_LAST_U32_VALID_MASK }}) -# define CAP_FS_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_LINUX_IMMUTABLE), \ - CAP_FS_MASK_B1 } }) -# define CAP_NFSD_SET ((kernel_cap_t){{ CAP_FS_MASK_B0 \ - | CAP_TO_MASK(CAP_SYS_RESOURCE), \ - CAP_FS_MASK_B1 } }) +# define cap_clear(c) do { (c).val = 0; } while (0) -#endif /* _KERNEL_CAPABILITY_U32S != 2 */ - -# define cap_clear(c) do { (c) = __cap_empty_set; } while (0) - -#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag)) -#define cap_lower(c, flag) ((c).cap[CAP_TO_INDEX(flag)] &= ~CAP_TO_MASK(flag)) -#define cap_raised(c, flag) ((c).cap[CAP_TO_INDEX(flag)] & CAP_TO_MASK(flag)) - -#define CAP_BOP_ALL(c, a, b, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = a.cap[__capi] OP b.cap[__capi]; \ - } \ -} while (0) - -#define CAP_UOP_ALL(c, a, OP) \ -do { \ - unsigned __capi; \ - CAP_FOR_EACH_U32(__capi) { \ - c.cap[__capi] = OP a.cap[__capi]; \ - } \ -} while (0) +#define cap_raise(c, flag) ((c).val |= BIT_ULL(flag)) +#define cap_lower(c, flag) ((c).val &= ~BIT_ULL(flag)) +#define cap_raised(c, flag) (((c).val & BIT_ULL(flag)) != 0) static inline kernel_cap_t cap_combine(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, |); - return dest; + return (kernel_cap_t) { a.val | b.val }; } static inline kernel_cap_t cap_intersect(const kernel_cap_t a, const kernel_cap_t b) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, b, &); - return dest; + return (kernel_cap_t) { a.val & b.val }; } static inline kernel_cap_t cap_drop(const kernel_cap_t a, const kernel_cap_t drop) { - kernel_cap_t dest; - CAP_BOP_ALL(dest, a, drop, &~); - return dest; -} - -static inline kernel_cap_t cap_invert(const kernel_cap_t c) -{ - kernel_cap_t dest; - CAP_UOP_ALL(dest, c, ~); - return dest; + return (kernel_cap_t) { a.val &~ drop.val }; } static inline bool cap_isclear(const kernel_cap_t a) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != 0) - return false; - } - return true; + return !a.val; } static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) { - unsigned __capi; - CAP_FOR_EACH_U32(__capi) { - if (a.cap[__capi] != b.cap[__capi]) - return false; - } - return true; + return a.val == b.val; } /* @@ -176,39 +114,31 @@ static inline bool cap_isidentical(const kernel_cap_t a, const kernel_cap_t b) */ static inline bool cap_issubset(const kernel_cap_t a, const kernel_cap_t set) { - kernel_cap_t dest; - dest = cap_drop(a, set); - return cap_isclear(dest); + return (a.val & set.val) == a.val; } /* Used to decide between falling back on the old suser() or fsuser(). */ static inline kernel_cap_t cap_drop_fs_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_FS_SET); } static inline kernel_cap_t cap_raise_fs_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_fs_set = CAP_FS_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_fs_set)); + return cap_combine(a, cap_intersect(permitted, CAP_FS_SET)); } static inline kernel_cap_t cap_drop_nfsd_set(const kernel_cap_t a) { - const kernel_cap_t __cap_fs_set = CAP_NFSD_SET; - return cap_drop(a, __cap_fs_set); + return cap_drop(a, CAP_NFSD_SET); } static inline kernel_cap_t cap_raise_nfsd_set(const kernel_cap_t a, const kernel_cap_t permitted) { - const kernel_cap_t __cap_nfsd_set = CAP_NFSD_SET; - return cap_combine(a, - cap_intersect(permitted, __cap_nfsd_set)); + return cap_combine(a, cap_intersect(permitted, CAP_NFSD_SET)); } #ifdef CONFIG_MULTIUSER diff --git a/io_uring/fdinfo.c b/io_uring/fdinfo.c index 882bd56b01ed..76c279b13aee 100644 --- a/io_uring/fdinfo.c +++ b/io_uring/fdinfo.c @@ -22,7 +22,6 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, struct user_namespace *uns = seq_user_ns(m); struct group_info *gi; kernel_cap_t cap; - unsigned __capi; int g; seq_printf(m, "%5d\n", id); @@ -42,8 +41,7 @@ static __cold int io_uring_show_cred(struct seq_file *m, unsigned int id, } seq_puts(m, "\n\tCapEff:\t"); cap = cred->cap_effective; - CAP_FOR_EACH_U32(__capi) - seq_put_hex_ll(m, NULL, cap.cap[CAP_LAST_U32 - __capi], 8); + seq_put_hex_ll(m, NULL, cap.val, 16); seq_putc(m, '\n'); return 0; } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 93d0b87f3283..addeed3df15d 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1295,15 +1295,11 @@ static void audit_log_execve_info(struct audit_context *context, static void audit_log_cap(struct audit_buffer *ab, char *prefix, kernel_cap_t *cap) { - int i; - if (cap_isclear(*cap)) { audit_log_format(ab, " %s=0", prefix); return; } - audit_log_format(ab, " %s=", prefix); - CAP_FOR_EACH_U32(i) - audit_log_format(ab, "%08x", cap->cap[CAP_LAST_U32 - i]); + audit_log_format(ab, " %s=%016llx", prefix, cap->val); } static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) diff --git a/kernel/capability.c b/kernel/capability.c index 339a44dfe2f4..40ede532259e 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -151,6 +151,7 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) pid_t pid; unsigned tocopy; kernel_cap_t pE, pI, pP; + struct __user_cap_data_struct kdata[2]; ret = cap_validate_magic(header, &tocopy); if ((dataptr == NULL) || (ret != 0)) @@ -163,42 +164,46 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) return -EINVAL; ret = cap_get_target_pid(pid, &pE, &pI, &pP); - if (!ret) { - struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; - unsigned i; - - for (i = 0; i < tocopy; i++) { - kdata[i].effective = pE.cap[i]; - kdata[i].permitted = pP.cap[i]; - kdata[i].inheritable = pI.cap[i]; - } - - /* - * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S, - * we silently drop the upper capabilities here. This - * has the effect of making older libcap - * implementations implicitly drop upper capability - * bits when they perform a: capget/modify/capset - * sequence. - * - * This behavior is considered fail-safe - * behavior. Upgrading the application to a newer - * version of libcap will enable access to the newer - * capabilities. - * - * An alternative would be to return an error here - * (-ERANGE), but that causes legacy applications to - * unexpectedly fail; the capget/modify/capset aborts - * before modification is attempted and the application - * fails. - */ - if (copy_to_user(dataptr, kdata, tocopy - * sizeof(struct __user_cap_data_struct))) { - return -EFAULT; - } - } + if (ret) + return ret; - return ret; + /* + * Annoying legacy format with 64-bit capabilities exposed + * as two sets of 32-bit fields, so we need to split the + * capability values up. + */ + kdata[0].effective = pE.val; kdata[1].effective = pE.val >> 32; + kdata[0].permitted = pP.val; kdata[1].permitted = pP.val >> 32; + kdata[0].inheritable = pI.val; kdata[0].inheritable = pI.val >> 32; + + /* + * Note, in the case, tocopy < _KERNEL_CAPABILITY_U32S, + * we silently drop the upper capabilities here. This + * has the effect of making older libcap + * implementations implicitly drop upper capability + * bits when they perform a: capget/modify/capset + * sequence. + * + * This behavior is considered fail-safe + * behavior. Upgrading the application to a newer + * version of libcap will enable access to the newer + * capabilities. + * + * An alternative would be to return an error here + * (-ERANGE), but that causes legacy applications to + * unexpectedly fail; the capget/modify/capset aborts + * before modification is attempted and the application + * fails. + */ + if (copy_to_user(dataptr, kdata, tocopy * sizeof(kdata[0]))) + return -EFAULT; + + return 0; +} + +static kernel_cap_t mk_kernel_cap(u32 low, u32 high) +{ + return (kernel_cap_t) { (low | ((u64)high << 32)) & CAP_VALID_MASK }; } /** @@ -221,8 +226,8 @@ SYSCALL_DEFINE2(capget, cap_user_header_t, header, cap_user_data_t, dataptr) */ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) { - struct __user_cap_data_struct kdata[_KERNEL_CAPABILITY_U32S]; - unsigned i, tocopy, copybytes; + struct __user_cap_data_struct kdata[2] = { { 0, }, }; + unsigned tocopy, copybytes; kernel_cap_t inheritable, permitted, effective; struct cred *new; int ret; @@ -246,21 +251,9 @@ SYSCALL_DEFINE2(capset, cap_user_header_t, header, const cap_user_data_t, data) if (copy_from_user(&kdata, data, copybytes)) return -EFAULT; - for (i = 0; i < tocopy; i++) { - effective.cap[i] = kdata[i].effective; - permitted.cap[i] = kdata[i].permitted; - inheritable.cap[i] = kdata[i].inheritable; - } - while (i < _KERNEL_CAPABILITY_U32S) { - effective.cap[i] = 0; - permitted.cap[i] = 0; - inheritable.cap[i] = 0; - i++; - } - - effective.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; - permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; - inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + effective = mk_kernel_cap(kdata[0].effective, kdata[1].effective); + permitted = mk_kernel_cap(kdata[0].permitted, kdata[1].permitted); + inheritable = mk_kernel_cap(kdata[0].inheritable, kdata[1].inheritable); new = prepare_creds(); if (!new) diff --git a/kernel/umh.c b/kernel/umh.c index fbf872c624cb..2a4708277335 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -501,9 +501,9 @@ static int proc_cap_handler(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; - unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; - kernel_cap_t new_cap; - int err, i; + unsigned long cap_array[2]; + kernel_cap_t new_cap, *cap; + int err; if (write && (!capable(CAP_SETPCAP) || !capable(CAP_SYS_MODULE))) @@ -514,14 +514,16 @@ static int proc_cap_handler(struct ctl_table *table, int write, * userspace if this is a read. */ spin_lock(&umh_sysctl_lock); - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) { - if (table->data == CAP_BSET) - cap_array[i] = usermodehelper_bset.cap[i]; - else if (table->data == CAP_PI) - cap_array[i] = usermodehelper_inheritable.cap[i]; - else - BUG(); - } + if (table->data == CAP_BSET) + cap = &usermodehelper_bset; + else if (table->data == CAP_PI) + cap = &usermodehelper_inheritable; + else + BUG(); + + /* Legacy format: capabilities are exposed as two 32-bit values */ + cap_array[0] = (u32) cap->val; + cap_array[1] = cap->val >> 32; spin_unlock(&umh_sysctl_lock); t = *table; @@ -535,22 +537,15 @@ static int proc_cap_handler(struct ctl_table *table, int write, if (err < 0) return err; - /* - * convert from the sysctl array of ulongs to the kernel_cap_t - * internal representation - */ - for (i = 0; i < _KERNEL_CAPABILITY_U32S; i++) - new_cap.cap[i] = cap_array[i]; + new_cap.val = (u32)cap_array[0]; + new_cap.val += (u64)cap_array[1] << 32; /* * Drop everything not in the new_cap (but don't add things) */ if (write) { spin_lock(&umh_sysctl_lock); - if (table->data == CAP_BSET) - usermodehelper_bset = cap_intersect(usermodehelper_bset, new_cap); - if (table->data == CAP_PI) - usermodehelper_inheritable = cap_intersect(usermodehelper_inheritable, new_cap); + *cap = cap_intersect(*cap, new_cap); spin_unlock(&umh_sysctl_lock); } @@ -561,14 +556,14 @@ struct ctl_table usermodehelper_table[] = { { .procname = "bset", .data = CAP_BSET, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .maxlen = 2 * sizeof(unsigned long), .mode = 0600, .proc_handler = proc_cap_handler, }, { .procname = "inheritable", .data = CAP_PI, - .maxlen = _KERNEL_CAPABILITY_U32S * sizeof(unsigned long), + .maxlen = 2 * sizeof(unsigned long), .mode = 0600, .proc_handler = proc_cap_handler, }, diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c index 5e9949832af6..cf2ceec40b28 100644 --- a/security/apparmor/policy_unpack.c +++ b/security/apparmor/policy_unpack.c @@ -304,6 +304,26 @@ VISIBLE_IF_KUNIT bool aa_unpack_u64(struct aa_ext *e, u64 *data, const char *nam } EXPORT_SYMBOL_IF_KUNIT(aa_unpack_u64); +static bool aa_unpack_cap_low(struct aa_ext *e, kernel_cap_t *data, const char *name) +{ + u32 val; + + if (!aa_unpack_u32(e, &val, name)) + return false; + data->val = val; + return true; +} + +static bool aa_unpack_cap_high(struct aa_ext *e, kernel_cap_t *data, const char *name) +{ + u32 val; + + if (!aa_unpack_u32(e, &val, name)) + return false; + data->val = (u32)data->val | ((u64)val << 32); + return true; +} + VISIBLE_IF_KUNIT bool aa_unpack_array(struct aa_ext *e, const char *name, u16 *size) { void *pos = e->pos; @@ -897,25 +917,25 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) profile->path_flags = PATH_MEDIATE_DELETED; info = "failed to unpack profile capabilities"; - if (!aa_unpack_u32(e, &(rules->caps.allow.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.allow, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.audit.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.audit, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.quiet, NULL)) goto fail; - if (!aa_unpack_u32(e, &tmpcap.cap[0], NULL)) + if (!aa_unpack_cap_low(e, &tmpcap, NULL)) goto fail; info = "failed to unpack upper profile capabilities"; if (aa_unpack_nameX(e, AA_STRUCT, "caps64")) { /* optional upper half of 64 bit caps */ - if (!aa_unpack_u32(e, &(rules->caps.allow.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.allow, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.audit.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.audit, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.quiet.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.quiet, NULL)) goto fail; - if (!aa_unpack_u32(e, &(tmpcap.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &tmpcap, NULL)) goto fail; if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; @@ -924,9 +944,9 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) info = "failed to unpack extended profile capabilities"; if (aa_unpack_nameX(e, AA_STRUCT, "capsx")) { /* optional extended caps mediation mask */ - if (!aa_unpack_u32(e, &(rules->caps.extended.cap[0]), NULL)) + if (!aa_unpack_cap_low(e, &rules->caps.extended, NULL)) goto fail; - if (!aa_unpack_u32(e, &(rules->caps.extended.cap[1]), NULL)) + if (!aa_unpack_cap_high(e, &rules->caps.extended, NULL)) goto fail; if (!aa_unpack_nameX(e, AA_STRUCTEND, NULL)) goto fail; diff --git a/security/commoncap.c b/security/commoncap.c index aec62db55271..5bb7d1e96277 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -589,7 +589,6 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, bool *has_fcap) { struct cred *new = bprm->cred; - unsigned i; int ret = 0; if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE) @@ -598,22 +597,17 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, if (caps->magic_etc & VFS_CAP_REVISION_MASK) *has_fcap = true; - CAP_FOR_EACH_U32(i) { - __u32 permitted = caps->permitted.cap[i]; - __u32 inheritable = caps->inheritable.cap[i]; - - /* - * pP' = (X & fP) | (pI & fI) - * The addition of pA' is handled later. - */ - new->cap_permitted.cap[i] = - (new->cap_bset.cap[i] & permitted) | - (new->cap_inheritable.cap[i] & inheritable); + /* + * pP' = (X & fP) | (pI & fI) + * The addition of pA' is handled later. + */ + new->cap_permitted.val = + (new->cap_bset.val & caps->permitted.val) | + (new->cap_inheritable.val & caps->inheritable.val); - if (permitted & ~new->cap_permitted.cap[i]) - /* insufficient to execute correctly */ - ret = -EPERM; - } + if (caps->permitted.val & ~new->cap_permitted.val) + /* insufficient to execute correctly */ + ret = -EPERM; /* * For legacy apps, with no internal support for recognizing they @@ -644,7 +638,6 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, { struct inode *inode = d_backing_inode(dentry); __u32 magic_etc; - unsigned tocopy, i; int size; struct vfs_ns_cap_data data, *nscaps = &data; struct vfs_cap_data *caps = (struct vfs_cap_data *) &data; @@ -677,17 +670,14 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, case VFS_CAP_REVISION_1: if (size != XATTR_CAPS_SZ_1) return -EINVAL; - tocopy = VFS_CAP_U32_1; break; case VFS_CAP_REVISION_2: if (size != XATTR_CAPS_SZ_2) return -EINVAL; - tocopy = VFS_CAP_U32_2; break; case VFS_CAP_REVISION_3: if (size != XATTR_CAPS_SZ_3) return -EINVAL; - tocopy = VFS_CAP_U32_3; rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid)); break; @@ -705,15 +695,20 @@ int get_vfs_caps_from_disk(struct mnt_idmap *idmap, if (!rootid_owns_currentns(rootvfsuid)) return -ENODATA; - CAP_FOR_EACH_U32(i) { - if (i >= tocopy) - break; - cpu_caps->permitted.cap[i] = le32_to_cpu(caps->data[i].permitted); - cpu_caps->inheritable.cap[i] = le32_to_cpu(caps->data[i].inheritable); + cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted); + cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable); + + /* + * Rev1 had just a single 32-bit word, later expanded + * to a second one for the high bits + */ + if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) { + cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32; + cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32; } - cpu_caps->permitted.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; - cpu_caps->inheritable.cap[CAP_LAST_U32] &= CAP_LAST_U32_VALID_MASK; + cpu_caps->permitted.val &= CAP_VALID_MASK; + cpu_caps->inheritable.val &= CAP_VALID_MASK; cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid); diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c index 09ad5a4ebd1f..591104e79812 100644 --- a/tools/testing/selftests/bpf/progs/test_deny_namespace.c +++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c @@ -6,7 +6,7 @@ #include <linux/capability.h> struct kernel_cap_struct { - __u32 cap[_LINUX_CAPABILITY_U32S_3]; + __u64 val; } __attribute__((preserve_access_index)); struct cred { @@ -19,14 +19,13 @@ SEC("lsm.s/userns_create") int BPF_PROG(test_userns_create, const struct cred *cred, int ret) { struct kernel_cap_struct caps = cred->cap_effective; - int cap_index = CAP_TO_INDEX(CAP_SYS_ADMIN); - __u32 cap_mask = CAP_TO_MASK(CAP_SYS_ADMIN); + __u64 cap_mask = BIT_LL(CAP_SYS_ADMIN); if (ret) return 0; ret = -EPERM; - if (caps.cap[cap_index] & cap_mask) + if (caps.val & cap_mask) return 0; return -EPERM;