For regular arraymaps and percpu arraymaps, if the lookup is known to be inbounds, the inlined bounds check can be omitted. One fewer jump puts less pressure on the branch predictor. While it probably won't affect real workloads much, we can basically get this for free. So might as well - free wins are nice. JIT diff for regular arraymap (x86-64): ; val = bpf_map_lookup_elem(&map_array, &key); - 22: movabsq $-131387164803072, %rdi + 22: movabsq $-131387246749696, %rdi 2c: addq $472, %rdi 33: movl (%rsi), %eax - 36: cmpq $2, %rax - 3a: jae 0x45 - 3c: imulq $48, %rax, %rax - 40: addq %rdi, %rax - 43: jmp 0x47 - 45: xorl %eax, %eax - 47: movl $4, %edi + 36: imulq $48, %rax, %rax + 3a: addq %rdi, %rax + 3d: jmp 0x41 + 3f: xorl %eax, %eax + 41: movl $4, %edi JIT diff for percpu arraymap (x86-64): ; val = bpf_map_lookup_elem(&map_array_pcpu, &key); - 22: movabsq $-131387183532032, %rdi + 22: movabsq $-131387273779200, %rdi 2c: addq $472, %rdi 33: movl (%rsi), %eax - 36: cmpq $2, %rax - 3a: jae 0x52 - 3c: shlq $3, %rax - 40: addq %rdi, %rax - 43: movq (%rax), %rax - 47: addq %gs:170664, %rax - 50: jmp 0x54 - 52: xorl %eax, %eax - 54: movl $4, %edi + 36: shlq $3, %rax + 3a: addq %rdi, %rax + 3d: movq (%rax), %rax + 41: addq %gs:170664, %rax + 4a: jmp 0x4e + 4c: xorl %eax, %eax + 4e: movl $4, %edi Signed-off-by: Daniel Xu <dxu@xxxxxxxxx> --- kernel/bpf/arraymap.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 8dbdceeead95..7385104dc0d0 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -221,11 +221,13 @@ static int array_map_gen_lookup(struct bpf_map *map, *insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value)); *insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0); - if (!map->bypass_spec_v1) { - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); - *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); - } else { - *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); + if (!inbounds) { + if (!map->bypass_spec_v1) { + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4); + *insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask); + } else { + *insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3); + } } if (is_power_of_2(elem_size)) { @@ -269,11 +271,13 @@ static int percpu_array_map_gen_lookup(struct bpf_map *map, *insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs)); *insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0); - if (!map->bypass_spec_v1) { - *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6); - *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask); - } else { - *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5); + if (!inbounds) { + if (!map->bypass_spec_v1) { + *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6); + *insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask); + } else { + *insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5); + } } *insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3); -- 2.47.1