[PATCH bpf-next 3/3] bpf: arraymap: Skip boundscheck during inlining when possible

Daniel Xu <dxu@xxxxxxxxx> · Mon, 20 Jan 2025 21:35:12 -0700

For regular arraymaps and percpu arraymaps, if the lookup is known to be
inbounds, the inlined bounds check can be omitted. One fewer jump puts less
pressure on the branch predictor. While it probably won't affect real
workloads much, we can basically get this for free. So might as well -
free wins are nice.

JIT diff for regular arraymap (x86-64):

     ; val = bpf_map_lookup_elem(&map_array, &key);
    -  22:   movabsq $-131387164803072, %rdi
    +  22:   movabsq $-131387246749696, %rdi
       2c:   addq    $472, %rdi
       33:   movl    (%rsi), %eax
    -  36:   cmpq    $2, %rax
    -  3a:   jae     0x45
    -  3c:   imulq   $48, %rax, %rax
    -  40:   addq    %rdi, %rax
    -  43:   jmp     0x47
    -  45:   xorl    %eax, %eax
    -  47:   movl    $4, %edi
    +  36:   imulq   $48, %rax, %rax
    +  3a:   addq    %rdi, %rax
    +  3d:   jmp     0x41
    +  3f:   xorl    %eax, %eax
    +  41:   movl    $4, %edi

JIT diff for percpu arraymap (x86-64):

     ; val = bpf_map_lookup_elem(&map_array_pcpu, &key);
    -  22:   movabsq $-131387183532032, %rdi
    +  22:   movabsq $-131387273779200, %rdi
       2c:   addq    $472, %rdi
       33:   movl    (%rsi), %eax
    -  36:   cmpq    $2, %rax
    -  3a:   jae     0x52
    -  3c:   shlq    $3, %rax
    -  40:   addq    %rdi, %rax
    -  43:   movq    (%rax), %rax
    -  47:   addq    %gs:170664, %rax
    -  50:   jmp     0x54
    -  52:   xorl    %eax, %eax
    -  54:   movl    $4, %edi
    +  36:   shlq    $3, %rax
    +  3a:   addq    %rdi, %rax
    +  3d:   movq    (%rax), %rax
    +  41:   addq    %gs:170664, %rax
    +  4a:   jmp     0x4e
    +  4c:   xorl    %eax, %eax
    +  4e:   movl    $4, %edi

Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
---
 kernel/bpf/arraymap.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8dbdceeead95..7385104dc0d0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -221,11 +221,13 @@ static int array_map_gen_lookup(struct bpf_map *map,
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	if (!map->bypass_spec_v1) {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
-	} else {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+	if (!inbounds) {
+		if (!map->bypass_spec_v1) {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+			*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+		} else {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+		}
 	}
 
 	if (is_power_of_2(elem_size)) {
@@ -269,11 +271,13 @@ static int percpu_array_map_gen_lookup(struct bpf_map *map,
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
 
 	*insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
-	if (!map->bypass_spec_v1) {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
-	} else {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+	if (!inbounds) {
+		if (!map->bypass_spec_v1) {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
+			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
+		} else {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+		}
 	}
 
 	*insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
-- 
2.47.1