[PATCH 1/1] netfilter: add bpf base hook program generator

Florian Westphal <fw@xxxxxxxxx> · Thu, 14 Oct 2021 14:10:37 +0200

Add a kernel bpf program generator for netfilter base hooks.

Currently netfilter hooks are invoked by nf_hook_slow:

for i in hooks; do
  verdict = hooks[i]->indirect_func(hooks->[i].hook_arg, skb, state);

  switch (verdict) { ....

The autogenerator unrolls the loop, so we get:

state->priv = hooks->[0].hook_arg;
v = first_hook_function(state);
if (v != ACCEPT) goto done;
state->priv = hooks->[1].hook_arg;
v = second_hook_function(state); ...

Indirections are replaced by direct calls. Invocation of the
autogenerated programs is done via bpf dispatcher from nf_hook().

The autogenerated program has the same return value scheme as
nf_hook_slow(). NF_HOOK() points are converted to call the
autogenerated bpf program instead of nf_hook_slow().

Purpose of this is to eventually add a 'netfilter prog type' to bpf and
permit attachment of (userspace generated) bpf programs to the netfilter
machinery, e.g.  'attach bpf prog id 1234 to ipv6 PREROUTING at prio -300'.

This will require to expose the context structure (program argument,
'__nf_hook_state', with rewriting accesses to match nf_hook_state layout.

TODO:
1. Test !x86_64.
2. Test bridge family.
3. fix checkpatch errors.

Future work:
add support for NAT hooks, they still use indirect calls, but those
are less of a problem because these get called only once per
connection.

Could annotate ops struct as to what kind of verdicts the
C function can return.  This would allow to elide retval
check when hook can only return NF_ACCEPT.

Could add extra support for INGRESS hook to move more code from
inline functions to the autogenerated program.

Signed-off-by: Florian Westphal <fw@xxxxxxxxx>
---
 include/linux/netfilter.h           |  56 ++++
 include/net/netfilter/nf_hook_bpf.h |  14 +
 net/netfilter/Kconfig               |  10 +
 net/netfilter/Makefile              |   1 +
 net/netfilter/core.c                |  74 ++++-
 net/netfilter/nf_hook_bpf.c         | 425 ++++++++++++++++++++++++++++
 6 files changed, 577 insertions(+), 3 deletions(-)
 create mode 100644 include/net/netfilter/nf_hook_bpf.h
 create mode 100644 net/netfilter/nf_hook_bpf.c

diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
index c5de525218c2..9d22e672710c 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_NETFILTER_H
 #define __LINUX_NETFILTER_H
 
+#include <linux/filter.h>
 #include <linux/init.h>
 #include <linux/skbuff.h>
 #include <linux/net.h>
@@ -106,6 +107,9 @@ struct nf_hook_entries_rcu_head {
 };
 
 struct nf_hook_entries {
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	struct bpf_prog			*hook_prog;
+#endif
 	u16				num_hook_entries;
 	/* padding */
 	struct nf_hook_entry		hooks[];
@@ -205,6 +209,17 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 
 void nf_hook_slow_list(struct list_head *head, struct nf_hook_state *state,
 		       const struct nf_hook_entries *e);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+DECLARE_BPF_DISPATCHER(nf_hook_base);
+
+static __always_inline int bpf_prog_run_nf(const struct bpf_prog *prog,
+					   struct nf_hook_state *state)
+{
+	return __bpf_prog_run(prog, state, BPF_DISPATCHER_FUNC(nf_hook_base));
+}
+#endif
+
 /**
  *	nf_hook - call a netfilter hook
  *
@@ -259,11 +274,24 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net,
 
 	if (hook_head) {
 		struct nf_hook_state state;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+		const struct bpf_prog *p = READ_ONCE(hook_head->hook_prog);
+
+		nf_hook_state_init(&state, hook, pf, indev, outdev,
+				   sk, net, okfn);
+
+		state.priv = (void *)hook_head;
+		state.skb = skb;
 
+		migrate_disable();
+		ret = bpf_prog_run_nf(p, &state);
+		migrate_enable();
+#else
 		nf_hook_state_init(&state, hook, pf, indev, outdev,
 				   sk, net, okfn);
 
 		ret = nf_hook_slow(skb, &state, hook_head);
+#endif
 	}
 	rcu_read_unlock();
 
@@ -341,10 +369,38 @@ NF_HOOK_LIST(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk,
 
 	if (hook_head) {
 		struct nf_hook_state state;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+		const struct bpf_prog *p = hook_head->hook_prog;
+		struct sk_buff *skb, *next;
+		struct list_head sublist;
+		int ret;
 
 		nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
 
+		INIT_LIST_HEAD(&sublist);
+
+		migrate_disable();
+
+		list_for_each_entry_safe(skb, next, head, list) {
+			skb_list_del_init(skb);
+
+			state.priv = (void *)hook_head;
+			state.skb = skb;
+
+			ret = bpf_prog_run_nf(p, &state);
+			if (ret == 1)
+				list_add_tail(&skb->list, &sublist);
+		}
+
+		migrate_enable();
+
+		/* Put passed packets back on main list */
+		list_splice(&sublist, head);
+#else
+		nf_hook_state_init(&state, hook, pf, in, out, sk, net, okfn);
+
 		nf_hook_slow_list(head, &state, hook_head);
+#endif
 	}
 	rcu_read_unlock();
 }
diff --git a/include/net/netfilter/nf_hook_bpf.h b/include/net/netfilter/nf_hook_bpf.h
new file mode 100644
index 000000000000..12304e9f3d25
--- /dev/null
+++ b/include/net/netfilter/nf_hook_bpf.h
@@ -0,0 +1,14 @@
+struct bpf_dispatcher;
+struct bpf_prog;
+
+struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *n);
+struct bpf_prog *nf_hook_bpf_create_fb(void);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to);
+#else
+static inline void
+nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *f, struct bpf_prog *t)
+{
+}
+#endif
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 54395266339d..6eec1720ff3d 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -19,6 +19,16 @@ config NETFILTER_FAMILY_BRIDGE
 config NETFILTER_FAMILY_ARP
 	bool
 
+config HAVE_NF_HOOK_BPF
+	bool
+
+config NF_HOOK_BPF
+	bool "netfilter base hook bpf translator"
+	depends on BPF_JIT
+	help
+	  This partially unrolls nf_hook_slow interpreter loop with
+	  auto-generated BPF programs.
+
 config NETFILTER_NETLINK_HOOK
 	tristate "Netfilter base hook dump support"
 	depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index aab20e575ecd..13f1b95a7809 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -16,6 +16,7 @@ nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
 nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
 
 obj-$(CONFIG_NETFILTER) = netfilter.o
+obj-$(CONFIG_NF_HOOK_BPF) += nf_hook_bpf.o
 
 obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o
 obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index f4359179eba9..56d82822cab7 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -24,6 +24,7 @@
 #include <linux/rcupdate.h>
 #include <net/net_namespace.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/netfilter/nf_hook_bpf.h>
 #include <net/sock.h>
 
 #include "nf_internals.h"
@@ -47,6 +48,12 @@ static DEFINE_MUTEX(nf_hook_mutex);
 #define nf_entry_dereference(e) \
 	rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
 
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+DEFINE_BPF_DISPATCHER(nf_hook_base);
+
+static struct bpf_prog *fallback_nf_hook_slow;
+#endif
+
 static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 {
 	struct nf_hook_entries *e;
@@ -58,9 +65,25 @@ static struct nf_hook_entries *allocate_hook_entries_size(u16 num)
 	if (num == 0)
 		return NULL;
 
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	if (!fallback_nf_hook_slow) {
+		/* never free'd */
+		fallback_nf_hook_slow = nf_hook_bpf_create_fb();
+
+		if (!fallback_nf_hook_slow)
+			return NULL;
+	}
+#endif
+
 	e = kvzalloc(alloc, GFP_KERNEL);
-	if (e)
-		e->num_hook_entries = num;
+	if (!e)
+		return NULL;
+
+	e->num_hook_entries = num;
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	e->hook_prog = fallback_nf_hook_slow;
+#endif
+
 	return e;
 }
 
@@ -104,6 +127,7 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
 {
 	unsigned int i, alloc_entries, nhooks, old_entries;
 	struct nf_hook_ops **orig_ops = NULL;
+	struct bpf_prog *hook_bpf_prog;
 	struct nf_hook_ops **new_ops;
 	struct nf_hook_entries *new;
 	bool inserted = false;
@@ -156,6 +180,27 @@ nf_hook_entries_grow(const struct nf_hook_entries *old,
 		new->hooks[nhooks].priv = reg->priv;
 	}
 
+	hook_bpf_prog = nf_hook_bpf_create(new);
+
+	/* XXX: jit failure handling?
+	 * We could refuse hook registration.
+	 *
+	 * For now, allocate_hook_entries_size() sets
+	 * ->hook_prog to a small fallback program that
+	 *  calls nf_hook_slow().
+	 */
+	if (hook_bpf_prog) {
+		struct bpf_prog *old_prog = NULL;
+
+		new->hook_prog = hook_bpf_prog;
+
+		if (old)
+			old_prog = old->hook_prog;
+
+		nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
+					old_prog, hook_bpf_prog);
+	}
+
 	return new;
 }
 
@@ -221,6 +266,7 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 					  struct nf_hook_entries __rcu **pp)
 {
 	unsigned int i, j, skip = 0, hook_entries;
+	struct bpf_prog *hook_bpf_prog = NULL;
 	struct nf_hook_entries *new = NULL;
 	struct nf_hook_ops **orig_ops;
 	struct nf_hook_ops **new_ops;
@@ -244,8 +290,15 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 
 	hook_entries -= skip;
 	new = allocate_hook_entries_size(hook_entries);
-	if (!new)
+	if (!new) {
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+		struct bpf_prog *old_prog = old->hook_prog;
+
+		WRITE_ONCE(old->hook_prog, fallback_nf_hook_slow);
+		nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base), old_prog, NULL);
+#endif
 		return NULL;
+	}
 
 	new_ops = nf_hook_entries_get_hook_ops(new);
 	for (i = 0, j = 0; i < old->num_hook_entries; i++) {
@@ -256,7 +309,16 @@ static void *__nf_hook_entries_try_shrink(struct nf_hook_entries *old,
 		j++;
 	}
 	hooks_validate(new);
+
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	/* if this fails fallback prog calls nf_hook_slow. */
+	hook_bpf_prog = nf_hook_bpf_create(new);
+	if (hook_bpf_prog)
+		new->hook_prog = hook_bpf_prog;
+#endif
 out_assign:
+	nf_hook_bpf_change_prog(BPF_DISPATCHER_PTR(nf_hook_base),
+				old ? old->hook_prog : NULL, hook_bpf_prog);
 	rcu_assign_pointer(*pp, new);
 	return old;
 }
@@ -584,6 +646,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state,
 	int ret;
 
 	state->skb = skb;
+
 	for (; s < e->num_hook_entries; s++) {
 		verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state);
 		switch (verdict & NF_VERDICT_MASK) {
@@ -764,6 +827,11 @@ int __init netfilter_init(void)
 	if (ret < 0)
 		goto err_pernet;
 
+#if IS_ENABLED(CONFIG_NF_HOOK_BPF)
+	fallback_nf_hook_slow = nf_hook_bpf_create_fb();
+	WARN_ON_ONCE(!fallback_nf_hook_slow);
+#endif
+
 	return 0;
 err_pernet:
 	unregister_pernet_subsys(&netfilter_net_ops);
diff --git a/net/netfilter/nf_hook_bpf.c b/net/netfilter/nf_hook_bpf.c
new file mode 100644
index 000000000000..cd8aba6da53b
--- /dev/null
+++ b/net/netfilter/nf_hook_bpf.c
@@ -0,0 +1,425 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/string.h>
+#include <linux/hashtable.h>
+#include <linux/jhash.h>
+#include <linux/netfilter.h>
+
+#include <net/netfilter/nf_queue.h>
+
+/* BPF translator for netfilter hooks.
+ *
+ * Copyright (c) 2021 Red Hat GmbH
+ *
+ * Author: Florian Westphal <fw@xxxxxxxxx>
+ *
+ * Unroll nf_hook_slow interpreter loop into an equivalent bpf
+ * program that can be called *instead* of nf_hook_slow().
+ * This program thus has same return value as nf_hook_slow and
+ * handles nfqueue and packet drops internally.
+ *
+ * These bpf programs are called/run from nf_hook() inline function.
+ *
+ * Register usage is:
+ *
+ * BPF_REG_0: verdict.
+ * BPF_REG_1: struct nf_hook_state *
+ * BPF_REG_2: reserved as arg to nf_queue()
+ * BPF_REG_3: reserved as arg to nf_queue()
+ *
+ * Prologue storage:
+ * BPF_REG_6: copy of REG_1 (original struct nf_hook_state *)
+ * BPF_REG_7: copy of original state->priv value
+ * BPF_REG_8: hook_index.  Inited to 0, increments on each hook call.
+ */
+
+#define JMP_INVALID 0
+#define JIT_SIZE_MAX 0xffff
+
+struct nf_hook_prog {
+	struct bpf_insn *insns;
+	unsigned int pos;
+};
+
+static bool emit(struct nf_hook_prog *p, struct bpf_insn insn)
+{
+	if (WARN_ON_ONCE(p->pos >= BPF_MAXINSNS))
+		return false;
+
+	p->insns[p->pos] = insn;
+	p->pos++;
+	return true;
+}
+
+static bool xlate_one_hook(struct nf_hook_prog *p,
+			   const struct nf_hook_entries *e,
+			   const struct nf_hook_entry *h)
+{
+	int width = bytes_to_bpf_size(sizeof(h->priv));
+
+	/* if priv is NULL, the called hookfn does not use the priv member. */
+	if (!h->priv)
+		goto emit_hook_call;
+
+	if (WARN_ON_ONCE(width < 0))
+		return false;
+
+	/* x = entries[s]->priv; */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_2, BPF_REG_7,
+				 (unsigned long)&h->priv - (unsigned long)e)))
+		return false;
+
+	/* state->priv = x */
+	if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_2,
+				 offsetof(struct nf_hook_state, priv))))
+		return false;
+
+emit_hook_call:
+	if (!emit(p, BPF_EMIT_CALL(h->hook)))
+		return false;
+
+	/* Only advance to next hook on ACCEPT verdict.
+	 * Else, skip rest and move to tail.
+	 *
+	 * Postprocessing patches the jump offset to the
+	 * correct position, after last hook.
+	 */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, NF_ACCEPT, JMP_INVALID)))
+		return false;
+
+	return true;
+}
+
+static bool emit_mov_ptr_reg(struct nf_hook_prog *p, u8 dreg, u8 sreg)
+{
+	if (sizeof(void *) == sizeof(u64))
+		return emit(p, BPF_MOV64_REG(dreg, sreg));
+	if (sizeof(void *) == sizeof(u32))
+		return emit(p, BPF_MOV32_REG(dreg, sreg));
+
+	return false;
+}
+
+static bool do_prologue(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	if (WARN_ON_ONCE(width < 0))
+		return false;
+
+	/* argument to program is a pointer to struct nf_hook_state, in BPF_REG_1. */
+	if (!emit_mov_ptr_reg(p, BPF_REG_6, BPF_REG_1))
+		return false;
+
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_7, BPF_REG_1,
+				 offsetof(struct nf_hook_state, priv))))
+		return false;
+
+	/* Could load state->hook_index here, but we don't support index > 0 for bpf call. */
+	if (!emit(p, BPF_MOV32_IMM(BPF_REG_8, 0)))
+		return false;
+
+	return true;
+}
+
+static void patch_hook_jumps(struct nf_hook_prog *p)
+{
+	unsigned int i;
+
+	if (!p->insns)
+		return;
+
+	for (i = 0; i < p->pos; i++) {
+		if (BPF_CLASS(p->insns[i].code) != BPF_JMP)
+			continue;
+
+		if (p->insns[i].code == (BPF_EXIT | BPF_JMP))
+			continue;
+		if (p->insns[i].code == (BPF_CALL | BPF_JMP))
+			continue;
+
+		if (p->insns[i].off != JMP_INVALID)
+			continue;
+		p->insns[i].off = p->pos - i - 1;
+	}
+}
+
+static bool emit_retval(struct nf_hook_prog *p, int retval)
+{
+	if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, retval)))
+		return false;
+
+	return emit(p, BPF_EXIT_INSN());
+}
+
+static bool emit_nf_hook_slow(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	/* restore the original state->priv. */
+	if (!emit(p, BPF_STX_MEM(width, BPF_REG_6, BPF_REG_7,
+				 offsetof(struct nf_hook_state, priv))))
+		return false;
+
+	/* arg1 is state->skb */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6,
+				 offsetof(struct nf_hook_state, skb))))
+		return false;
+
+	/* arg2 is "struct nf_hook_state *" */
+	if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)))
+		return false;
+
+	/* arg3 is nf_hook_entries (original state->priv) */
+	if (!emit(p, BPF_MOV64_REG(BPF_REG_3, BPF_REG_7)))
+		return false;
+
+	if (!emit(p, BPF_EMIT_CALL(nf_hook_slow)))
+		return false;
+
+	/* No further action needed, return retval provided by nf_hook_slow */
+	return emit(p, BPF_EXIT_INSN());
+}
+
+static bool emit_nf_queue(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	if (width < 0) {
+		WARN_ON_ONCE(1);
+		return false;
+	}
+
+	/* int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, unsigned int verdict) */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, offsetof(struct nf_hook_state, skb))))
+		return false;
+	if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8,
+				 offsetof(struct nf_hook_state, hook_index))))
+		return false;
+	/* arg2: struct nf_hook_state * */
+	if (!emit(p, BPF_MOV64_REG(BPF_REG_2, BPF_REG_6)))
+		return false;
+	/* arg3: original hook return value: (NUM << NF_VERDICT_QBITS | NF_QUEUE) */
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0)))
+		return false;
+	if (!emit(p, BPF_EMIT_CALL(nf_queue)))
+		return false;
+
+	/* Check nf_queue return value.  Abnormal case: nf_queue returned != 0.
+	 *
+	 * Fall back to nf_hook_slow().
+	 */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2)))
+		return false;
+
+	/* Normal case: skb was stolen. Return 0. */
+	return emit_retval(p, 0);
+}
+
+static bool do_epilogue_base_hooks(struct nf_hook_prog *p)
+{
+	int width = bytes_to_bpf_size(sizeof(void *));
+
+	if (WARN_ON_ONCE(width < 0))
+		return false;
+
+	/* last 'hook'. We arrive here if previous hook returned ACCEPT,
+	 * i.e. all hooks passed -- we are done.
+	 *
+	 * Return 1, skb can continue traversing network stack.
+	 */
+	if (!emit_retval(p, 1))
+		return false;
+
+	/* Patch all hook jumps, in case any of these are taken
+	 * we need to jump to this location.
+	 *
+	 * This happens when verdict is != ACCEPT.
+	 */
+	patch_hook_jumps(p);
+
+	/* need to ignore upper 24 bits, might contain errno or queue number */
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_3, BPF_REG_0)))
+		return false;
+	if (!emit(p, BPF_ALU32_IMM(BPF_AND, BPF_REG_3, 0xff)))
+		return false;
+
+	/* ACCEPT handled, check STOLEN. */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_STOLEN, 2)))
+		return false;
+
+	if (!emit_retval(p, 0))
+		return false;
+
+	/* ACCEPT and STOLEN handled.  Check DROP next */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_3, NF_DROP, 1 + 2 + 2 + 2 + 2)))
+		return false;
+
+	/* First step. Extract the errno number. 1 insn. */
+	if (!emit(p, BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, NF_VERDICT_QBITS)))
+		return false;
+
+	/* Second step: replace errno with EPERM if it was 0. 2 insns. */
+	if (!emit(p, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1)))
+		return false;
+	if (!emit(p, BPF_MOV32_IMM(BPF_REG_0, EPERM)))
+		return false;
+
+	/* Third step: negate reg0: Caller expects -EFOO and stash the result.  2 insns. */
+	if (!emit(p, BPF_ALU32_IMM(BPF_NEG, BPF_REG_0, 0)))
+		return false;
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_8, BPF_REG_0)))
+		return false;
+
+	/* Fourth step: free the skb. 2 insns. */
+	if (!emit(p, BPF_LDX_MEM(width, BPF_REG_1, BPF_REG_6, offsetof(struct nf_hook_state, skb))))
+		return false;
+	if (!emit(p, BPF_EMIT_CALL(kfree_skb)))
+		return false;
+
+	/* Last step: return. 2 insns. */
+	if (!emit(p, BPF_MOV32_REG(BPF_REG_0, BPF_REG_8)))
+		return false;
+	if (!emit(p, BPF_EXIT_INSN()))
+		return false;
+
+	/* ACCEPT, STOLEN and DROP have been handled.
+	 * REPEAT and STOP are not allowed anymore for individual hook functions.
+	 * This leaves NFQUEUE as only remaing return value.
+	 *
+	 * In this case BPF_REG_0 still contains the original verdict of
+	 * '(NUM << NF_VERDICT_QBITS | NF_QUEUE)', so pass it to nf_queue() as-is.
+	 */
+	if (!emit_nf_queue(p))
+		return false;
+
+	/* Increment hook index and store it in nf_hook_state so nf_hook_slow will
+	 * start at the next hook, if any.
+	 */
+	if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1)))
+		return false;
+	if (!emit(p, BPF_STX_MEM(BPF_H, BPF_REG_6, BPF_REG_8,
+				 offsetof(struct nf_hook_state, hook_index))))
+		return false;
+
+	return emit_nf_hook_slow(p);
+}
+
+static int nf_hook_prog_init(struct nf_hook_prog *p)
+{
+	memset(p, 0, sizeof(*p));
+
+	p->insns = kcalloc(BPF_MAXINSNS, sizeof(*p->insns), GFP_KERNEL);
+	if (!p->insns)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nf_hook_prog_free(struct nf_hook_prog *p)
+{
+	kfree(p->insns);
+}
+
+static int xlate_base_hooks(struct nf_hook_prog *p, const struct nf_hook_entries *e)
+{
+	unsigned int i, len;
+
+	len = e->num_hook_entries;
+
+	if (!do_prologue(p))
+		goto out;
+
+	for (i = 0; i < len; i++) {
+		if (!xlate_one_hook(p, e, &e->hooks[i]))
+			goto out;
+
+		if (i + 1 < len) {
+			if (!emit(p, BPF_MOV64_REG(BPF_REG_1, BPF_REG_6)))
+				goto out;
+
+			if (!emit(p, BPF_ALU32_IMM(BPF_ADD, BPF_REG_8, 1)))
+				goto out;
+		}
+	}
+
+	if (!do_epilogue_base_hooks(p))
+		goto out;
+
+	return 0;
+out:
+	return -EINVAL;
+}
+
+static struct bpf_prog *nf_hook_jit_compile(struct bpf_insn *insns, unsigned int len)
+{
+	struct bpf_prog *prog;
+	int err = 0;
+
+	prog = bpf_prog_alloc(bpf_prog_size(len), 0);
+	if (!prog)
+		return NULL;
+
+	prog->len = len;
+	prog->type = BPF_PROG_TYPE_SOCKET_FILTER;
+	memcpy(prog->insnsi, insns, prog->len * sizeof(struct bpf_insn));
+
+	prog = bpf_prog_select_runtime(prog, &err);
+	if (err) {
+		bpf_prog_free(prog);
+		return NULL;
+	}
+
+	return prog;
+}
+
+/* fallback program, invokes nf_hook_slow interpreter.
+ *
+ * Used when a hook is unregsitered and new program cannot
+ * be compiled for some reason.
+ */
+struct bpf_prog *nf_hook_bpf_create_fb(void)
+{
+	struct bpf_prog *prog;
+	struct nf_hook_prog p;
+	int err;
+
+	err = nf_hook_prog_init(&p);
+	if (err)
+		return NULL;
+
+	if (!do_prologue(&p))
+		goto err;
+
+	if (!emit_nf_hook_slow(&p))
+		goto err;
+
+	prog = nf_hook_jit_compile(p.insns, p.pos);
+err:
+	nf_hook_prog_free(&p);
+	return prog;
+}
+
+struct bpf_prog *nf_hook_bpf_create(const struct nf_hook_entries *new)
+{
+	struct bpf_prog *prog;
+	struct nf_hook_prog p;
+	int err;
+
+	err = nf_hook_prog_init(&p);
+	if (err)
+		return NULL;
+
+	err = xlate_base_hooks(&p, new);
+	if (err)
+		goto err;
+
+	prog = nf_hook_jit_compile(p.insns, p.pos);
+err:
+	nf_hook_prog_free(&p);
+	return prog;
+}
+
+void nf_hook_bpf_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, struct bpf_prog *to)
+{
+	bpf_dispatcher_change_prog(d, from, to);
+}
-- 
2.32.0