[PATCH bpf-next 3/3] bpf: arraymap: Skip boundscheck during inlining when possible

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For regular arraymaps and percpu arraymaps, if the lookup is known to be
inbounds, the inlined bounds check can be omitted. One fewer jump puts less
pressure on the branch predictor. While it probably won't affect real
workloads much, we can basically get this for free. So might as well -
free wins are nice.

JIT diff for regular arraymap (x86-64):

     ; val = bpf_map_lookup_elem(&map_array, &key);
    -  22:   movabsq $-131387164803072, %rdi
    +  22:   movabsq $-131387246749696, %rdi
       2c:   addq    $472, %rdi
       33:   movl    (%rsi), %eax
    -  36:   cmpq    $2, %rax
    -  3a:   jae     0x45
    -  3c:   imulq   $48, %rax, %rax
    -  40:   addq    %rdi, %rax
    -  43:   jmp     0x47
    -  45:   xorl    %eax, %eax
    -  47:   movl    $4, %edi
    +  36:   imulq   $48, %rax, %rax
    +  3a:   addq    %rdi, %rax
    +  3d:   jmp     0x41
    +  3f:   xorl    %eax, %eax
    +  41:   movl    $4, %edi

JIT diff for percpu arraymap (x86-64):

     ; val = bpf_map_lookup_elem(&map_array_pcpu, &key);
    -  22:   movabsq $-131387183532032, %rdi
    +  22:   movabsq $-131387273779200, %rdi
       2c:   addq    $472, %rdi
       33:   movl    (%rsi), %eax
    -  36:   cmpq    $2, %rax
    -  3a:   jae     0x52
    -  3c:   shlq    $3, %rax
    -  40:   addq    %rdi, %rax
    -  43:   movq    (%rax), %rax
    -  47:   addq    %gs:170664, %rax
    -  50:   jmp     0x54
    -  52:   xorl    %eax, %eax
    -  54:   movl    $4, %edi
    +  36:   shlq    $3, %rax
    +  3a:   addq    %rdi, %rax
    +  3d:   movq    (%rax), %rax
    +  41:   addq    %gs:170664, %rax
    +  4a:   jmp     0x4e
    +  4c:   xorl    %eax, %eax
    +  4e:   movl    $4, %edi

Signed-off-by: Daniel Xu <dxu@xxxxxxxxx>
---
 kernel/bpf/arraymap.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 8dbdceeead95..7385104dc0d0 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -221,11 +221,13 @@ static int array_map_gen_lookup(struct bpf_map *map,
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	if (!map->bypass_spec_v1) {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
-	} else {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+	if (!inbounds) {
+		if (!map->bypass_spec_v1) {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
+			*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
+		} else {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 3);
+		}
 	}
 
 	if (is_power_of_2(elem_size)) {
@@ -269,11 +271,13 @@ static int percpu_array_map_gen_lookup(struct bpf_map *map,
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct bpf_array, pptrs));
 
 	*insn++ = BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0);
-	if (!map->bypass_spec_v1) {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
-		*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
-	} else {
-		*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+	if (!inbounds) {
+		if (!map->bypass_spec_v1) {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 6);
+			*insn++ = BPF_ALU32_IMM(BPF_AND, BPF_REG_0, array->index_mask);
+		} else {
+			*insn++ = BPF_JMP_IMM(BPF_JGE, BPF_REG_0, map->max_entries, 5);
+		}
 	}
 
 	*insn++ = BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 3);
-- 
2.47.1





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux