From: Hou Tao <houtao1@xxxxxxxxxx> When updating or deleting a map in map htab, the map may still be accessed by non-sleepable program or sleepable program. However bpf_fd_htab_map_update_elem() decreases the ref-count of the inner map directly through bpf_map_put(), if the ref-count is the last ref-count which is true for most cases, the inner map will be free by ops->map_free() in a kworker. But for now, most .map_free() callbacks don't use synchronize_rcu() or its variants to wait for the elapse of a RCU grace period, so bpf program which is accessing the inner map may incur use-after-free problem. Fix it by deferring the invocation of bpf_map_put() after the elapse of both one RCU grace period and one tasks trace RCU grace period. Fixes: bba1dc0b55ac ("bpf: Remove redundant synchronize_rcu.") Fixes: 638e4b825d52 ("bpf: Allows per-cpu maps and map-in-map in sleepable programs") Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- kernel/bpf/hashtab.c | 25 +++++++++++++++---------- kernel/bpf/map_in_map.h | 4 ++-- 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 81b9f237c942b..0013329af6d36 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -1813,10 +1813,10 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, } else { value = l->key + roundup_key_size; if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { - struct bpf_map **inner_map = value; + void *inner = READ_ONCE(*(void **)value); - /* Actual value is the id of the inner map */ - map_id = map->ops->map_fd_sys_lookup_elem(*inner_map); + /* Actual value is the id of the inner map */ + map_id = map->ops->map_fd_sys_lookup_elem(inner); value = &map_id; } @@ -2553,12 +2553,16 @@ static struct bpf_map *htab_of_map_alloc(union bpf_attr *attr) static void *htab_of_map_lookup_elem(struct bpf_map *map, void *key) { - struct bpf_map **inner_map = htab_map_lookup_elem(map, key); + struct bpf_inner_map_element *element; + void **ptr; - if (!inner_map) + ptr = htab_map_lookup_elem(map, key); + if (!ptr) return NULL; - return READ_ONCE(*inner_map); + /* element must be no-NULL */ + element = READ_ONCE(*ptr); + return element->map; } static int htab_of_map_gen_lookup(struct bpf_map *map, @@ -2570,11 +2574,12 @@ static int htab_of_map_gen_lookup(struct bpf_map *map, BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem, (void *(*)(struct bpf_map *map, void *key))NULL)); *insn++ = BPF_EMIT_CALL(__htab_map_lookup_elem); - *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 3); *insn++ = BPF_ALU64_IMM(BPF_ADD, ret, offsetof(struct htab_elem, key) + round_up(map->key_size, 8)); *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); + *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); return insn - insn_buf; } @@ -2592,9 +2597,9 @@ const struct bpf_map_ops htab_of_maps_map_ops = { .map_get_next_key = htab_map_get_next_key, .map_lookup_elem = htab_of_map_lookup_elem, .map_delete_elem = htab_map_delete_elem, - .map_fd_get_ptr = bpf_map_fd_get_ptr, - .map_fd_put_ptr = bpf_map_fd_put_ptr, - .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, + .map_fd_get_ptr = bpf_map_of_map_fd_get_ptr, + .map_fd_put_ptr = bpf_map_of_map_fd_put_ptr, + .map_fd_sys_lookup_elem = bpf_map_of_map_fd_sys_lookup_elem, .map_gen_lookup = htab_of_map_gen_lookup, .map_check_btf = map_check_no_btf, .map_mem_usage = htab_map_mem_usage, diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h index 1fa688b8882ae..f8719bcd7c254 100644 --- a/kernel/bpf/map_in_map.h +++ b/kernel/bpf/map_in_map.h @@ -10,8 +10,8 @@ struct file; struct bpf_map; struct bpf_inner_map_element { - /* map must be the first member, array_of_map_gen_lookup() depends on it - * to dereference map correctly. + /* map must be the first member, array_of_map_gen_lookup() and + * htab_of_map_lookup_elem() depend on it to dereference map correctly. */ struct bpf_map *map; struct rcu_head rcu; -- 2.29.2