From: Hou Tao <houtao1@xxxxxxxxxx> When updating or deleting a map in map array, the map may still be accessed by non-sleepable program or sleepable program. However bpf_fd_array_map_update_elem() decreases the ref-count of the inner map directly through bpf_map_put(), if the ref-count is the last ref-count which is true for most cases, the inner map will be free by ops->map_free() in a kworker. But for now, most .map_free() callbacks don't use synchronize_rcu() or its variants to wait for the elapse of a RCU grace period, so bpf program which is accessing the inner map may incur use-after-free problem. Fix it by deferring the invocation of bpf_map_put() after the elapse of both one RCU grace period and one tasks trace RCU grace period. Fixes: bba1dc0b55ac ("bpf: Remove redundant synchronize_rcu.") Fixes: 638e4b825d52 ("bpf: Allows per-cpu maps and map-in-map in sleepable programs") Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- kernel/bpf/arraymap.c | 26 +++++++++++++++++--------- kernel/bpf/map_in_map.h | 3 +++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index c1124b71da158..2229253bcb6bd 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1355,12 +1355,18 @@ static void array_of_map_free(struct bpf_map *map) static void *array_of_map_lookup_elem(struct bpf_map *map, void *key) { - struct bpf_map **inner_map = array_map_lookup_elem(map, key); + struct bpf_inner_map_element *element; + void **ptr; - if (!inner_map) + ptr = array_map_lookup_elem(map, key); + if (!ptr) return NULL; - return READ_ONCE(*inner_map); + element = READ_ONCE(*ptr); + /* Uninitialized element ? */ + if (!element) + return NULL; + return element->map; } static int array_of_map_gen_lookup(struct bpf_map *map, @@ -1376,10 +1382,10 @@ static int array_of_map_gen_lookup(struct bpf_map *map, *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); if (!map->bypass_spec_v1) { - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 8); *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); } else { - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5); + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 7); } if (is_power_of_2(elem_size)) *insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(elem_size)); @@ -1387,7 +1393,9 @@ static int array_of_map_gen_lookup(struct bpf_map *map, *insn++ = BPF_ALU64_IMM(BPF_MUL, ret, elem_size); *insn++ = BPF_ALU64_REG(BPF_ADD, ret, map_ptr); *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); - *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4); + *insn++ = BPF_LDX_MEM(BPF_DW, ret, ret, 0); + *insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2); *insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1); *insn++ = BPF_MOV64_IMM(ret, 0); @@ -1401,9 +1409,9 @@ const struct bpf_map_ops array_of_maps_map_ops = { .map_get_next_key = array_map_get_next_key, .map_lookup_elem = array_of_map_lookup_elem, .map_delete_elem = fd_array_map_delete_elem, - .map_fd_get_ptr = bpf_map_fd_get_ptr, - .map_fd_put_ptr = bpf_map_fd_put_ptr, - .map_fd_sys_lookup_elem = bpf_map_fd_sys_lookup_elem, + .map_fd_get_ptr = bpf_map_of_map_fd_get_ptr, + .map_fd_put_ptr = bpf_map_of_map_fd_put_ptr, + .map_fd_sys_lookup_elem = bpf_map_of_map_fd_sys_lookup_elem, .map_gen_lookup = array_of_map_gen_lookup, .map_lookup_batch = generic_map_lookup_batch, .map_update_batch = generic_map_update_batch, diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h index 4a0d66757a065..1fa688b8882ae 100644 --- a/kernel/bpf/map_in_map.h +++ b/kernel/bpf/map_in_map.h @@ -10,6 +10,9 @@ struct file; struct bpf_map; struct bpf_inner_map_element { + /* map must be the first member, array_of_map_gen_lookup() depends on it + * to dereference map correctly. + */ struct bpf_map *map; struct rcu_head rcu; }; -- 2.29.2