From: Björn Töpel <bjorn.topel@xxxxxxxxx> The BPF dispatcher builds on top of the BPF trampoline ideas; Introduce bpf_arch_text_poke() and (re-)use the BPF JIT generate code. The dispatcher builds a dispatch table for XDP programs, for retpoline avoidance. The table is a simple binary search model, so lookup is O(log n). Here, the dispatch table is limited to four entries (for laziness reason -- only 1B relative jumps :-P). If the dispatch table is full, it will fallback to the retpoline path. An example: A module/driver allocates a dispatcher. The dispatcher is shared for all netdevs. Each netdev allocate a slot in the dispatcher and a BPF program. The netdev then uses the dispatcher to call the correct program with a direct call (actually a tail-call). Signed-off-by: Björn Töpel <bjorn.topel@xxxxxxxxx> --- arch/x86/net/bpf_jit_comp.c | 96 ++++++++++++++++++ kernel/bpf/Makefile | 1 + kernel/bpf/dispatcher.c | 197 ++++++++++++++++++++++++++++++++++++ 3 files changed, 294 insertions(+) create mode 100644 kernel/bpf/dispatcher.c diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 28782a1c386e..d75aebf508b8 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -10,10 +10,12 @@ #include <linux/if_vlan.h> #include <linux/bpf.h> #include <linux/memory.h> +#include <linux/sort.h> #include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> +#include <asm/asm-prototypes.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -1471,6 +1473,100 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags return 0; } +#if defined(CONFIG_BPF_JIT) && defined(CONFIG_RETPOLINE) + +/* Emits the dispatcher. Id lookup is limited to BPF_DISPATCHER_MAX, + * so it'll fit into PAGE_SIZE/2. The lookup is binary search: O(log + * n). + */ +static int emit_bpf_dispatcher(u8 **pprog, int a, int b, u64 *progs, + u8 *fb) +{ + u8 *prog = *pprog, *jg_reloc; + int pivot, err, cnt = 0; + s64 jmp_offset; + + if (a == b) { + emit_mov_imm64(&prog, BPF_REG_0, /* movabs func,%rax */ + progs[a] >> 32, + (progs[a] << 32) >> 32); + EMIT3(0x48, 0x39, 0xC2); /* cmp rdx, rax */ + jmp_offset = fb - (prog + 2); + if (!is_imm8(jmp_offset)) + return -1; + EMIT2(X86_JNE, jmp_offset); /* jne retpoline */ + err = emit_jmp(&prog, (void *)progs[a], /* jmp bpf_prog */ + prog); + if (err) + return err; + goto out; + } + + pivot = (b - a) / 2; + emit_mov_imm64(&prog, BPF_REG_0, progs[a + pivot] >> 32, + (progs[a + pivot] << 32) >> 32); + EMIT3(0x48, 0x39, 0xC2); /* cmp rdx, rax */ + + jg_reloc = prog; + EMIT2(X86_JG, 0); /* jg pivot + 1-part */ + + err = emit_bpf_dispatcher(&prog, a, a + pivot, progs, fb); + if (err) + return err; + + jmp_offset = prog - (jg_reloc + 2); + if (!is_imm8(jmp_offset)) + return -1; + emit_code(jg_reloc, X86_JG + (jmp_offset << 8), 2); + + err = emit_bpf_dispatcher(&prog, a + pivot + 1, b, progs, fb); + if (err) + return err; +out: + *pprog = prog; + return 0; +} + +#endif + +static int cmp_ips(const void *a, const void *b) +{ + const u64 *ipa = a; + const u64 *ipb = b; + + if (*ipa > *ipb) + return 1; + if (*ipa < *ipb) + return -1; + return 0; +} + +#define BPF_DISPATCHER_MAX 4 + +int arch_prepare_bpf_dispatcher(void *image, struct bpf_prog **progs, + int num_progs) +{ + u64 ips[BPF_DISPATCHER_MAX] = {}; + u8 *fallback, *prog = image; + int i, err, cnt = 0; + + if (!num_progs || num_progs > BPF_DISPATCHER_MAX) + return -EINVAL; + + for (i = 0; i < num_progs; i++) + ips[i] = (u64)progs[i]->bpf_func; + + EMIT2(0xEB, 5); /* jmp rip+5 (skip retpoline) */ + fallback = prog; + err = emit_jmp(&prog, /* jmp retpoline */ + __x86_indirect_thunk_rdx, prog); + if (err) + return err; + + sort(&ips[0], num_progs, sizeof(ips[i]), cmp_ips, NULL); + return emit_bpf_dispatcher(&prog, 0, num_progs - 1, &ips[0], fallback); +} + struct x64_jit_data { struct bpf_binary_header *header; int *addrs; diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 3f671bf617e8..d4f330351f87 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o +obj-$(CONFIG_BPF_JIT) += dispatcher.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += cpumap.o diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c new file mode 100644 index 000000000000..691898640720 --- /dev/null +++ b/kernel/bpf/dispatcher.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2019 Intel Corporation. */ + +#ifdef CONFIG_RETPOLINE + +#include <linux/hash.h> +#include <linux/bpf.h> +#include <linux/filter.h> + +/* The BPF dispatcher is a multiway branch code generator. A user + * registers a slot (id) and can then update the BPF program for that + * slot. The dispatcher is jited, and will be rejited every time a + * slot is allocated/deallocated for performance reasons. An example: + * A module provides code for multiple netdevs. Each netdev can have + * one XDP program. The module code will allocate a dispatcher, and + * when the netdev enables XDP it allocates a new slot. + * + * Nothing like STATIC_CALL_INLINE is supported yet, so an explicit + * trampoline is needed: + * + * unsigned int dispatcher_trampoline(void *ctx, void *insn, int id) + */ + +#define DISPATCHER_HASH_BITS 10 +#define DISPATCHER_TABLE_SIZE (1 << DISPATCHER_HASH_BITS) + +static struct hlist_head dispatcher_table[DISPATCHER_TABLE_SIZE]; + +#define BPF_DISPATCHER_MAX 4 + +struct bpf_dispatcher { + struct hlist_node hlist; + void *func; + struct bpf_prog *progs[BPF_DISPATCHER_MAX]; + int num_progs; + void *image; + u64 selector; +}; + +static DEFINE_MUTEX(dispatcher_mutex); + +struct bpf_dispatcher *bpf_dispatcher_lookup(void *func) +{ + struct bpf_dispatcher *d; + struct hlist_head *head; + void *image; + + head = &dispatcher_table[hash_ptr(func, DISPATCHER_HASH_BITS)]; + hlist_for_each_entry(d, head, hlist) { + if (d->func == func) + return d; + } + d = kzalloc(sizeof(*d), GFP_KERNEL); + if (!d) + return NULL; + + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) { + kfree(d); + return NULL; + } + + d->func = func; + INIT_HLIST_NODE(&d->hlist); + hlist_add_head(&d->hlist, head); + + set_vm_flush_reset_perms(image); + set_memory_x((long)image, 1); + d->image = image; + return d; +} + +static void bpf_dispatcher_free(struct bpf_dispatcher *d) +{ + bpf_jit_free_exec(d->image); + hlist_del(&d->hlist); + kfree(d); +} + +static int bpf_dispatcher_add_prog(struct bpf_dispatcher *d, + struct bpf_prog *prog) +{ + struct bpf_prog **entry = NULL; + int i, err = 0; + + if (d->num_progs == BPF_DISPATCHER_MAX) + return err; + + for (i = 0; i < BPF_DISPATCHER_MAX; i++) { + if (!entry && !d->progs[i]) + entry = &d->progs[i]; + if (d->progs[i] == prog) + return err; + } + + prog = bpf_prog_inc(prog); + if (IS_ERR(prog)) + return err; + + *entry = prog; + d->num_progs++; + return err; +} + +static void bpf_dispatcher_remove_prog(struct bpf_dispatcher *d, + struct bpf_prog *prog) +{ + int i; + + for (i = 0; i < BPF_DISPATCHER_MAX; i++) { + if (d->progs[i] == prog) { + bpf_prog_put(prog); + d->progs[i] = NULL; + d->num_progs--; + break; + } + } +} + +int __weak arch_prepare_bpf_dispatcher(void *image, struct bpf_prog **progs, + int num_ids) +{ + return -ENOTSUPP; +} + +/* NB! bpf_dispatcher_update() might free the dispatcher! */ +static int bpf_dispatcher_update(struct bpf_dispatcher *d) +{ + void *old_image = d->image + ((d->selector + 1) & 1) * PAGE_SIZE / 2; + void *new_image = d->image + (d->selector & 1) * PAGE_SIZE / 2; + int err; + + if (d->num_progs == 0) { + err = bpf_arch_text_poke(d->func, BPF_MOD_JMP_TO_NOP, + old_image, NULL); + bpf_dispatcher_free(d); + goto out; + } + + err = arch_prepare_bpf_dispatcher(new_image, &d->progs[0], + d->num_progs); + if (err) + goto out; + + if (d->selector) + /* progs already running at this address */ + err = bpf_arch_text_poke(d->func, BPF_MOD_JMP_TO_JMP, + old_image, new_image); + else + /* first time registering */ + err = bpf_arch_text_poke(d->func, BPF_MOD_NOP_TO_JMP, + NULL, new_image); + + if (err) + goto out; + d->selector++; + +out: + return err; +} + +void bpf_dispatcher_change_prog(void *func, struct bpf_prog *from, + struct bpf_prog *to) +{ + struct bpf_dispatcher *d; + + if (!from && !to) + return; + + mutex_lock(&dispatcher_mutex); + d = bpf_dispatcher_lookup(func); + if (!d) + goto out; + + if (from) + bpf_dispatcher_remove_prog(d, from); + + if (to) + bpf_dispatcher_add_prog(d, to); + + WARN_ON(bpf_dispatcher_update(d)); + +out: + mutex_unlock(&dispatcher_mutex); +} + +static int __init init_dispatchers(void) +{ + int i; + + for (i = 0; i < DISPATCHER_TABLE_SIZE; i++) + INIT_HLIST_HEAD(&dispatcher_table[i]); + return 0; +} +late_initcall(init_dispatchers); + +#endif -- 2.20.1