Add bpf_sysctl_get_name() helper to copy sysctl name (/proc/sys/ entry) into provided by BPF_PROG_TYPE_CGROUP_SYSCTL program buffer. By default full name (w/o /proc/sys/) is copied, e.g. "net/ipv4/tcp_mem". If BPF_F_SYSCTL_BASE_NAME flag is set, only base name will be copied, e.g. "tcp_mem". Documentation for the new helper is provided in bpf.h UAPI. Signed-off-by: Andrey Ignatov <rdna@xxxxxx> --- include/uapi/linux/bpf.h | 22 ++++++++++++- kernel/bpf/cgroup.c | 70 +++++++++++++++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4cfda9c16327..c9e8a1f22c14 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -2479,6 +2479,22 @@ union bpf_attr { * Return * 0 if iph and th are a valid SYN cookie ACK, or a negative error * otherwise. + * + * int bpf_sysctl_get_name(struct bpf_sysctl *ctx, char *buf, size_t buf_len, u64 flags) + * Description + * Get name of sysctl in /proc/sys/ and copy it into provided by + * program buffer *buf* of size *buf_len*. + * + * The buffer is always NUL terminated, unless it's zero-sized. + * + * If *flags* is zero, full name (e.g. "net/ipv4/tcp_mem") is + * copied. Use **BPF_F_SYSCTL_BASE_NAME** flag to copy base name + * only (e.g. "tcp_mem"). + * Return + * Number of character copied (not including the trailing NUL). + * + * **-E2BIG** if the buffer wasn't big enough (*buf* will contain + * truncated name in this case). */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -2581,7 +2597,8 @@ union bpf_attr { FN(skb_ecn_set_ce), \ FN(get_listener_sock), \ FN(skc_lookup_tcp), \ - FN(tcp_check_syncookie), + FN(tcp_check_syncookie), \ + FN(sysctl_get_name), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call @@ -2648,6 +2665,9 @@ enum bpf_func_id { #define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3) #define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4) +/* BPF_FUNC_sysctl_get_name flags. */ +#define BPF_F_SYSCTL_BASE_NAME (1ULL << 0) + /* Mode for BPF_FUNC_skb_adjust_room helper. */ enum bpf_adj_room_mode { BPF_ADJ_ROOM_NET, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 610491b5f0aa..a68387043244 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -14,6 +14,7 @@ #include <linux/filter.h> #include <linux/slab.h> #include <linux/sysctl.h> +#include <linux/string.h> #include <linux/bpf.h> #include <linux/bpf-cgroup.h> #include <net/sock.h> @@ -806,10 +807,77 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, } EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl); +static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp, + size_t *lenp) +{ + ssize_t tmp_ret = 0, ret; + + if (dir->header.parent) { + tmp_ret = sysctl_cpy_dir(dir->header.parent, bufp, lenp); + if (tmp_ret < 0) + return tmp_ret; + } + + ret = strscpy(*bufp, dir->header.ctl_table[0].procname, *lenp); + if (ret < 0) + return ret; + *bufp += ret; + *lenp -= ret; + ret += tmp_ret; + + /* Avoid leading slash. */ + if (!ret) + return ret; + + tmp_ret = strscpy(*bufp, "/", *lenp); + if (tmp_ret < 0) + return tmp_ret; + *bufp += tmp_ret; + *lenp -= tmp_ret; + + return ret + tmp_ret; +} + +BPF_CALL_4(bpf_sysctl_get_name, struct bpf_sysctl_kern *, ctx, char *, buf, + size_t, buf_len, u64, flags) +{ + ssize_t tmp_ret = 0, ret; + + if (!buf) + return -EINVAL; + + if (!(flags & BPF_F_SYSCTL_BASE_NAME)) { + if (!ctx->head) + return -EINVAL; + tmp_ret = sysctl_cpy_dir(ctx->head->parent, &buf, &buf_len); + if (tmp_ret < 0) + return tmp_ret; + } + + ret = strscpy(buf, ctx->table->procname, buf_len); + + return ret < 0 ? ret : tmp_ret + ret; +} + +static const struct bpf_func_proto bpf_sysctl_get_name_proto = { + .func = bpf_sysctl_get_name, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, +}; + static const struct bpf_func_proto * sysctl_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { - return cgroup_base_func_proto(func_id, prog); + switch (func_id) { + case BPF_FUNC_sysctl_get_name: + return &bpf_sysctl_get_name_proto; + default: + return cgroup_base_func_proto(func_id, prog); + } } static bool sysctl_is_valid_access(int off, int size, enum bpf_access_type type, -- 2.17.1