This patch adds a "capability" member to "struct bpf_map". Each map-type's capability is tagged in the "BPF_MAP_TYPE" in bpf_types.h. This will clean up the individual check in bpf_map_meta_alloc() which decides if a map can be used as an inner map. It will be less error prone to decide its capability at the same place as the new map type is added in bpf_types.h. That will help to avoid mistake like missing modification in other source files like the map_in_map.c here. Cc: Andrey Ignatov <rdna@xxxxxx> Signed-off-by: Martin KaFai Lau <kafai@xxxxxx> --- include/linux/bpf.h | 6 +++- include/linux/bpf_types.h | 62 ++++++++++++++++++++++----------------- kernel/bpf/btf.c | 2 +- kernel/bpf/map_in_map.c | 9 ++---- kernel/bpf/syscall.c | 19 ++++++++++-- kernel/bpf/verifier.c | 2 +- 6 files changed, 60 insertions(+), 40 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index efe8836b5c48..1e20b9911d48 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -97,6 +97,9 @@ struct bpf_map_memory { struct user_struct *user; }; +/* Cannot be used as an inner map */ +#define BPF_MAP_CAP_NO_INNER_MAP (1 << 0) + struct bpf_map { /* The first two cachelines with read-mostly members of which some * are also accessed in fast-path (e.g. ops, max_entries). @@ -120,6 +123,7 @@ struct bpf_map { struct bpf_map_memory memory; char name[BPF_OBJ_NAME_LEN]; u32 btf_vmlinux_value_type_id; + u32 capability; bool bypass_spec_v1; bool frozen; /* write-once; write-protected by freeze_mutex */ /* 22 bytes hole */ @@ -1037,7 +1041,7 @@ extern const struct file_operations bpf_iter_fops; #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; -#define BPF_MAP_TYPE(_id, _ops) \ +#define BPF_MAP_TYPE(_id, _ops, map_cap) \ extern const struct bpf_map_ops _ops; #define BPF_LINK_TYPE(_id, _name) #include <linux/bpf_types.h> diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 29d22752fc87..652f17d646dd 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -76,47 +76,55 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, #endif /* CONFIG_BPF_LSM */ #endif -BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops, 0) +/* prog_array->aux->{type,jited} is a runtime binding. + * Doing static check alone in the verifier is not enough, + * so BPF_MAP_CAP_NO_INNTER_MAP is needed. + */ +BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, + BPF_MAP_CAP_NO_INNER_MAP) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops, 0) #ifdef CONFIG_CGROUPS -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops, 0) #endif #ifdef CONFIG_CGROUP_BPF -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops) -#endif -BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops, + BPF_MAP_CAP_NO_INNER_MAP) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops, + BPF_MAP_CAP_NO_INNER_MAP) +#endif +BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops, 0) #ifdef CONFIG_PERF_EVENTS -BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_trace_map_ops, 0) #endif -BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops, 0) #ifdef CONFIG_NET -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops, 0) #if defined(CONFIG_BPF_STREAM_PARSER) -BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKHASH, sock_hash_ops, 0) #endif -BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops, 0) #if defined(CONFIG_XDP_SOCKETS) -BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_XSKMAP, xsk_map_ops, 0) #endif #ifdef CONFIG_INET -BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops, 0) #endif #endif -BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) -BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops, 0) +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops, 0) #if defined(CONFIG_BPF_JIT) -BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops, + BPF_MAP_CAP_NO_INNER_MAP) #endif BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 58c9af1d4808..6b74ab8f8530 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3481,7 +3481,7 @@ extern char __weak __start_BTF[]; extern char __weak __stop_BTF[]; extern struct btf *btf_vmlinux; -#define BPF_MAP_TYPE(_id, _ops) +#define BPF_MAP_TYPE(_id, _ops, map_cap) #define BPF_LINK_TYPE(_id, _name) static union { struct bpf_ctx_convert { diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c index 17738c93bec8..6e1286ad7b76 100644 --- a/kernel/bpf/map_in_map.c +++ b/kernel/bpf/map_in_map.c @@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) if (IS_ERR(inner_map)) return inner_map; - /* prog_array->aux->{type,jited} is a runtime binding. - * Doing static check alone in the verifier is not enough. - */ - if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || - inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || - inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE || - inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + if (inner_map->capability & BPF_MAP_CAP_NO_INNER_MAP) { fdput(f); return ERR_PTR(-ENOTSUPP); } @@ -56,6 +50,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) inner_map_meta->map_flags = inner_map->map_flags; inner_map_meta->max_entries = inner_map->max_entries; inner_map_meta->spin_lock_off = inner_map->spin_lock_off; + inner_map_meta->capability = inner_map->capability; /* Misc members not needed in bpf_map_meta_equal() check. */ inner_map_meta->ops = inner_map->ops; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 431241c74614..f93a96b8d440 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -49,7 +49,7 @@ int sysctl_unprivileged_bpf_disabled __read_mostly; static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) -#define BPF_MAP_TYPE(_id, _ops) \ +#define BPF_MAP_TYPE(_id, _ops, map_cap) \ [_id] = &_ops, #define BPF_LINK_TYPE(_id, _name) #include <linux/bpf_types.h> @@ -58,6 +58,17 @@ static const struct bpf_map_ops * const bpf_map_types[] = { #undef BPF_LINK_TYPE }; +static const u32 bpf_map_caps[] = { +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) +#define BPF_MAP_TYPE(_id, _ops, map_cap) \ + [_id] = map_cap, +#define BPF_LINK_TYPE(_id, _name) +#include <linux/bpf_types.h> +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE +}; + /* * If we're handed a bigger struct than we know of, ensure all the unknown bits * are 0 - i.e. new user-space does not rely on any kernel feature extensions @@ -131,6 +142,8 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; map->ops = ops; map->map_type = type; + map->capability = bpf_map_caps[type]; + return map; } @@ -1551,7 +1564,7 @@ static int map_freeze(const union bpf_attr *attr) static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _prog_ops, -#define BPF_MAP_TYPE(_id, _ops) +#define BPF_MAP_TYPE(_id, _ops, map_cap) #define BPF_LINK_TYPE(_id, _name) #include <linux/bpf_types.h> #undef BPF_PROG_TYPE @@ -2333,7 +2346,7 @@ static int bpf_link_release(struct inode *inode, struct file *filp) #ifdef CONFIG_PROC_FS #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) -#define BPF_MAP_TYPE(_id, _ops) +#define BPF_MAP_TYPE(_id, _ops, map_cap) #define BPF_LINK_TYPE(_id, _name) [_id] = #_name, static const char *bpf_link_type_strs[] = { [BPF_LINK_TYPE_UNSPEC] = "<invalid>", diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 2ed8351f47a4..5f3b97d25c4e 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -27,7 +27,7 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _verifier_ops, -#define BPF_MAP_TYPE(_id, _ops) +#define BPF_MAP_TYPE(_id, _ops, map_cap) #define BPF_LINK_TYPE(_id, _name) #include <linux/bpf_types.h> #undef BPF_PROG_TYPE -- 2.24.1