In the interest of making bpf() more useful by unprivileged users, this patch teaches bpf to respect access modes on map and prog inodes. The permissions are: R on a map: read the map W on a map: write the map Referencing a map from a program should require RW. R on a prog: Read or introspect the prog W on a prog: Attach the prog to something Test-running a prog is a form of introspection, so it requires RW. Detaching a prog merely uses the fd for identification, so neither R nor W is needed. This is likely incomplete, and it has some comments that should be removed. This patch uses WRITE instead of EXEC as the permission needed to run (by attaching or test-running) a program. EXEC seems nicer, but O_MAYEXEC isn't merged, which makes using X awkward. --- include/linux/bpf.h | 15 +++++++------ kernel/bpf/arraymap.c | 8 ++++++- kernel/bpf/cgroup.c | 6 ++++- kernel/bpf/inode.c | 25 ++++++++++++++------- kernel/bpf/syscall.c | 51 ++++++++++++++++++++++++++++++------------ kernel/events/core.c | 5 +++-- net/core/dev.c | 4 +++- net/core/filter.c | 8 ++++--- net/netfilter/xt_bpf.c | 5 +++-- net/packet/af_packet.c | 2 +- 10 files changed, 89 insertions(+), 40 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 18f4cc2c6acd..2d5e1a4dff6c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -630,9 +630,9 @@ extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; -struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_get(u32 ufd, int mask); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - bool attach_drv); + bool attach_drv, int mask); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); @@ -662,7 +662,7 @@ void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); extern int sysctl_unprivileged_bpf_disabled; int bpf_map_new_fd(struct bpf_map *map, int flags); -int bpf_prog_new_fd(struct bpf_prog *prog); +int bpf_prog_new_fd(struct bpf_prog *prog, int flags); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); @@ -733,7 +733,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } -struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type, int mask); int array_map_alloc_check(union bpf_attr *attr); int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, @@ -850,7 +850,7 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, } static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, - enum bpf_prog_type type) + enum bpf_prog_type type, int mask) { return ERR_PTR(-EOPNOTSUPP); } @@ -878,9 +878,10 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, - enum bpf_prog_type type) + enum bpf_prog_type type, + int mask) { - return bpf_prog_get_type_dev(ufd, type, false); + return bpf_prog_get_type_dev(ufd, type, false, mask); } bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 1c65ce0098a9..7e17a5d42110 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -522,6 +522,10 @@ int bpf_fd_array_map_lookup_elem(struct bpf_map *map, void *key, u32 *value) } /* only called from syscall */ +/* + * XXX: it's totally unclear to me what this ends up doing with the fd + * in general. + */ int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags) { @@ -569,7 +573,9 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, struct file *map_file, int fd) { struct bpf_array *array = container_of(map, struct bpf_array, map); - struct bpf_prog *prog = bpf_prog_get(fd); + + /* XXX: what, exactly, does this end up doing to the prog in question? */ + struct bpf_prog *prog = bpf_prog_get(fd, FMODE_READ | FMODE_WRITE); if (IS_ERR(prog)) return prog; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 0a00eaca6fae..1450c3bdab82 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -562,7 +562,11 @@ int cgroup_bpf_prog_detach(const union bpf_attr *attr, enum bpf_prog_type ptype) if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + /* + * No particular access required -- this only uses the fd to identify + * a program, not to do anything with the program. + */ + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype, 0); if (IS_ERR(prog)) prog = NULL; diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index cc0d0cf114e3..cb07736b33ae 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -58,7 +58,7 @@ static void bpf_any_put(void *raw, enum bpf_type type) } } -static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) +static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type, int mask) { void *raw; @@ -66,7 +66,7 @@ static void *bpf_fd_probe_obj(u32 ufd, enum bpf_type *type) raw = bpf_map_get_with_uref(ufd); if (IS_ERR(raw)) { *type = BPF_TYPE_PROG; - raw = bpf_prog_get(ufd); + raw = bpf_prog_get(ufd, mask); } return raw; @@ -430,7 +430,12 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname) if (IS_ERR(pname)) return PTR_ERR(pname); - raw = bpf_fd_probe_obj(ufd, &type); + /* + * Pinning an object effectively grants the caller all access, because + * the caller ends up owning the inode. So require all access. + * XXX: If we use FMODE_EXEC, we should require FMODE_EXEC too. + */ + raw = bpf_fd_probe_obj(ufd, &type, FMODE_READ | FMODE_WRITE); if (IS_ERR(raw)) { ret = PTR_ERR(raw); goto out; @@ -456,6 +461,10 @@ static void *bpf_obj_do_get(const struct filename *pathname, if (ret) return ERR_PTR(ret); + /* + * XXX: O_MAYEXEC doesn't exist, which is problematic here if we + * want to use FMODE_EXEC. + */ inode = d_backing_inode(path.dentry); ret = inode_permission(inode, ACC_MODE(flags)); if (ret) @@ -499,7 +508,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags) } if (type == BPF_TYPE_PROG) - ret = bpf_prog_new_fd(raw); + ret = bpf_prog_new_fd(raw, f_flags); else if (type == BPF_TYPE_MAP) ret = bpf_map_new_fd(raw, f_flags); else @@ -512,10 +521,10 @@ int bpf_obj_get_user(const char __user *pathname, int flags) return ret; } -static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type) +static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type, int mask) { struct bpf_prog *prog; - int ret = inode_permission(inode, MAY_READ); + int ret = inode_permission(inode, mask); if (ret) return ERR_PTR(ret); @@ -536,14 +545,14 @@ static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type return bpf_prog_inc(prog); } -struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type) +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type, int mask) { struct bpf_prog *prog; struct path path; int ret = kern_path(name, LOOKUP_FOLLOW, &path); if (ret) return ERR_PTR(ret); - prog = __get_prog_inode(d_backing_inode(path.dentry), type); + prog = __get_prog_inode(d_backing_inode(path.dentry), type, mask); if (!IS_ERR(prog)) touch_atime(&path); path_put(&path); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5d141f16f6fa..23f8f89d2a86 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -447,6 +447,7 @@ int bpf_map_new_fd(struct bpf_map *map, int flags) int bpf_get_file_flag(int flags) { + /* XXX: What about exec? */ if ((flags & BPF_F_RDONLY) && (flags & BPF_F_WRONLY)) return -EINVAL; if (flags & BPF_F_RDONLY) @@ -556,6 +557,10 @@ static int map_create(union bpf_attr *attr) if (err) return -EINVAL; + /* + * XXX: I'm a bit confused. Why would you ever create a map and + * grant *yourself* less than full permission? + */ f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) return f_flags; @@ -1411,7 +1416,7 @@ const struct file_operations bpf_prog_fops = { .write = bpf_dummy_write, }; -int bpf_prog_new_fd(struct bpf_prog *prog) +int bpf_prog_new_fd(struct bpf_prog *prog, int flags) { int ret; @@ -1420,10 +1425,10 @@ int bpf_prog_new_fd(struct bpf_prog *prog) return ret; return anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, - O_RDWR | O_CLOEXEC); + flags | O_CLOEXEC); } -static struct bpf_prog *____bpf_prog_get(struct fd f) +static struct bpf_prog *____bpf_prog_get(struct fd f, int mask) { if (!f.file) return ERR_PTR(-EBADF); @@ -1431,6 +1436,10 @@ static struct bpf_prog *____bpf_prog_get(struct fd f) fdput(f); return ERR_PTR(-EINVAL); } + if ((f.file->f_mode & mask) != mask) { + fdput(f); + return ERR_PTR(-EACCES); + } return f.file->private_data; } @@ -1497,12 +1506,12 @@ bool bpf_prog_get_ok(struct bpf_prog *prog, } static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, - bool attach_drv) + bool attach_drv, int mask) { struct fd f = fdget(ufd); struct bpf_prog *prog; - prog = ____bpf_prog_get(f); + prog = ____bpf_prog_get(f, mask); if (IS_ERR(prog)) return prog; if (!bpf_prog_get_ok(prog, attach_type, attach_drv)) { @@ -1516,15 +1525,15 @@ static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *attach_type, return prog; } -struct bpf_prog *bpf_prog_get(u32 ufd) +struct bpf_prog *bpf_prog_get(u32 ufd, int mask) { - return __bpf_prog_get(ufd, NULL, false); + return __bpf_prog_get(ufd, NULL, false, mask); } struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - bool attach_drv) + bool attach_drv, int mask) { - return __bpf_prog_get(ufd, &type, attach_drv); + return __bpf_prog_get(ufd, &type, attach_drv, mask); } EXPORT_SYMBOL_GPL(bpf_prog_get_type_dev); @@ -1707,7 +1716,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) if (err) goto free_used_maps; - err = bpf_prog_new_fd(prog); + err = bpf_prog_new_fd(prog, O_RDWR /* | O_MAYEXEC */); if (err < 0) { /* failed to allocate fd. * bpf_prog_put() is needed because the above @@ -1808,7 +1817,7 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) } raw_tp->btp = btp; - prog = bpf_prog_get(attr->raw_tracepoint.prog_fd); + prog = bpf_prog_get(attr->raw_tracepoint.prog_fd, MAY_EXEC); if (IS_ERR(prog)) { err = PTR_ERR(prog); goto out_free_tp; @@ -1929,7 +1938,7 @@ static int bpf_prog_attach(const union bpf_attr *attr) return -EINVAL; } - prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype); + prog = bpf_prog_get_type(attr->attach_bpf_fd, ptype, MAY_EXEC); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -2083,7 +2092,11 @@ static int bpf_prog_test_run(const union bpf_attr *attr, (!attr->test.ctx_size_out && attr->test.ctx_out)) return -EINVAL; - prog = bpf_prog_get(attr->test.prog_fd); + /* + * A test run is is a form of query, so require RW. Using W as a proxy for + * X, since X is awkward due to a lack of O_MAYEXEC. + */ + prog = bpf_prog_get(attr->test.prog_fd, MAY_READ | MAY_WRITE); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -2147,7 +2160,11 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) if (IS_ERR(prog)) return PTR_ERR(prog); - fd = bpf_prog_new_fd(prog); + /* + * We have all permissions. This is okay, since we also require + * CAP_SYS_ADMIN to do this at all. + */ + fd = bpf_prog_new_fd(prog, O_RDWR /* | O_MAYEXEC */); if (fd < 0) bpf_prog_put(prog); @@ -2638,6 +2655,11 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, if (!f.file) return -EBADFD; + if (!(f.file->f_mode & FMODE_READ)) { + err = -EACCES; + goto out; + } + if (f.file->f_op == &bpf_prog_fops) err = bpf_prog_get_info_by_fd(f.file->private_data, attr, uattr); @@ -2649,6 +2671,7 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, else err = -EINVAL; +out: fdput(f); return err; } diff --git a/kernel/events/core.c b/kernel/events/core.c index 026a14541a38..f2e3973b28f2 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8876,7 +8876,8 @@ static int perf_event_set_bpf_handler(struct perf_event *event, u32 prog_fd) if (event->prog) return -EEXIST; - prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT); + /* Should maybe be FMODE_EXEC? */ + prog = bpf_prog_get_type(prog_fd, BPF_PROG_TYPE_PERF_EVENT, FMODE_WRITE); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -8942,7 +8943,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd) /* bpf programs can only be attached to u/kprobe or tracepoint */ return -EINVAL; - prog = bpf_prog_get(prog_fd); + prog = bpf_prog_get(prog_fd, FMODE_WRITE); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/core/dev.c b/net/core/dev.c index fc676b2610e3..3fcaeae693bb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8093,8 +8093,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return -EBUSY; } + /* XXX: FMODE_EXEC? */ prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, - bpf_op == ops->ndo_bpf); + bpf_op == ops->ndo_bpf, + FMODE_WRITE); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/core/filter.c b/net/core/filter.c index 4e2a79b2fd77..9282462678fd 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -1544,7 +1544,8 @@ static struct bpf_prog *__get_bpf(u32 ufd, struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return ERR_PTR(-EPERM); - return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); + /* FMODE_EXEC? */ + return bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE); } int sk_attach_bpf(u32 ufd, struct sock *sk) @@ -1572,9 +1573,10 @@ int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk) if (sock_flag(sk, SOCK_FILTER_LOCKED)) return -EPERM; - prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER); + prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE); if (IS_ERR(prog) && PTR_ERR(prog) == -EINVAL) - prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT); + prog = bpf_prog_get_type(ufd, BPF_PROG_TYPE_SK_REUSEPORT, + FMODE_WRITE); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 13cf3f9b5938..34e5c08ee1f3 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -44,7 +44,7 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) { struct bpf_prog *prog; - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER, MAY_EXEC); if (IS_ERR(prog)) return PTR_ERR(prog); @@ -57,7 +57,8 @@ static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) if (strnlen(path, XT_BPF_PATH_MAX) == XT_BPF_PATH_MAX) return -EINVAL; - *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER); + *ret = bpf_prog_get_type_path(path, BPF_PROG_TYPE_SOCKET_FILTER, + MAY_EXEC); return PTR_ERR_OR_ZERO(*ret); } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 8d54f3047768..5b8c5e5d94bf 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1563,7 +1563,7 @@ static int fanout_set_data_ebpf(struct packet_sock *po, char __user *data, if (copy_from_user(&fd, data, len)) return -EFAULT; - new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + new = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER, FMODE_WRITE); if (IS_ERR(new)) return PTR_ERR(new); -- 2.21.0