>From nf_tables_newrule(), this calls nft_jit_rule() that transforms our internal expression structure layout to abstract syntax tree, then we walk over this syntax tree to generate the BPF instructions that are placed in the rule jit buffer. From the commit phase, collect all jit buffers, place them in a BPF program that gets attached to the chain. This should be good enough to test simple payload and meta match. For more sophisticated stuff, we may use internal bpf helpers to call our _eval() functions. Signed-off-by: Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> --- include/net/netfilter/nf_tables.h | 19 +++++ include/net/netfilter/nf_tables_jit.h | 13 ++++ net/ipv4/netfilter/nf_tables_ipv4.c | 7 +- net/netfilter/Makefile | 2 +- net/netfilter/nf_tables_api.c | 28 ++++++++ net/netfilter/nf_tables_bpf.c | 92 ++++++++++++++++++++++++ net/netfilter/nf_tables_jit.c | 132 ++++++++++++++++++++++++++++++++++ 7 files changed, 287 insertions(+), 6 deletions(-) create mode 100644 net/netfilter/nf_tables_bpf.c diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 8a6406f3811f..09a2faceee91 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -10,6 +10,7 @@ #include <linux/netfilter/nf_tables.h> #include <linux/u64_stats_sync.h> #include <net/netfilter/nf_flow_table.h> +#include <linux/filter.h> #include <net/netlink.h> #define NFT_JUMP_STACK_SIZE 16 @@ -796,10 +797,23 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) return 0; } +/* Size of buffer to store jit transformation. Instead of having a fixed buffer + * size per rule, we can calculate this in runtime when walking over the syntax + * tree instead, this is a proof of concept at this stage, so use a fixed size + * buffer. + */ +#define NFT_RULE_JIT_BUFSIZ 32 + +struct nft_rule_jit { + struct sock_filter insn[NFT_RULE_JIT_BUFSIZ]; + unsigned int len; +}; + /** * struct nft_rule - nf_tables rule * * @list: used internally + * @jit: instructions that result from jit transformation * @handle: rule handle * @genmask: generation mask * @dlen: length of expression data @@ -808,6 +822,7 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src) */ struct nft_rule { struct list_head list; + struct nft_rule_jit jit; u64 handle:42, genmask:2, dlen:12, @@ -915,6 +930,8 @@ struct nft_stats { * struct nft_base_chain - nf_tables base chain * * @ops: netfilter hook ops + * @fp: jitted filter program + * @fp_stash: jitted filter program stash (for two-commit phase protocol) * @type: chain type * @policy: default policy * @stats: per-cpu chain stats @@ -923,6 +940,8 @@ struct nft_stats { */ struct nft_base_chain { struct nf_hook_ops ops; + struct bpf_prog __rcu *fp; + struct bpf_prog *fp_stash; const struct nf_chain_type *type; u8 policy; u8 flags; diff --git a/include/net/netfilter/nf_tables_jit.h b/include/net/netfilter/nf_tables_jit.h index dff3af7ad420..5309a45a6dc2 100644 --- a/include/net/netfilter/nf_tables_jit.h +++ b/include/net/netfilter/nf_tables_jit.h @@ -120,4 +120,17 @@ struct nft_ast_xfrm_desc { int nft_ast_xfrm(const struct list_head *ast_stmt_list, const struct nft_ast_xfrm_desc *base_desc, void *data); +struct sock_fprog_kern; + +void nft_jit_emit_basechain_policy(struct sock_fprog_kern *prog, + unsigned int verdict); +int nft_jit_rule(struct nft_rule *rule, + const struct nft_ast_xfrm_desc *xfrm_desc); + +extern struct nft_ast_xfrm_desc nft_jit_bpf_xfrm_desc; + +int nft_jit_prepare(struct net *net); +void nft_jit_commit(struct net *net); +void nft_jit_release(struct net *net); + #endif diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c index 96f955496d5f..4cb872f3e5a4 100644 --- a/net/ipv4/netfilter/nf_tables_ipv4.c +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -22,12 +22,9 @@ static unsigned int nft_do_chain_ipv4(void *priv, struct sk_buff *skb, const struct nf_hook_state *state) { - struct nft_pktinfo pkt; + const struct nft_chain *chain = priv; - nft_set_pktinfo(&pkt, skb, state); - nft_set_pktinfo_ipv4(&pkt, skb); - - return nft_do_chain(&pkt, priv); + return BPF_PROG_RUN(nft_base_chain(chain)->fp, skb); } static const struct nf_chain_type filter_ipv4 = { diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 8b0006e3ae6f..6bd2bcbaa74c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -76,7 +76,7 @@ obj-$(CONFIG_NF_DUP_NETDEV) += nf_dup_netdev.o nf_tables-objs := nf_tables_core.o nf_tables_api.o nf_tables_trace.o \ nft_immediate.o nft_cmp.o nft_range.o nft_bitwise.o \ nft_byteorder.o nft_payload.o nft_lookup.o nft_dynset.o \ - nf_tables_jit.o + nf_tables_jit.o nf_tables_bpf.o obj-$(CONFIG_NF_TABLES) += nf_tables.o obj-$(CONFIG_NF_TABLES_INET) += nf_tables_inet.o diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 8b9fe30de0cd..4746631a5e9d 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -15,11 +15,13 @@ #include <linux/netlink.h> #include <linux/vmalloc.h> #include <linux/netfilter.h> +#include <linux/filter.h> #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_flow_table.h> #include <net/netfilter/nf_tables_core.h> #include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_jit.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -1324,13 +1326,16 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct nft_stats __percpu *stats; struct net *net = ctx->net; struct nft_chain *chain; + struct bpf_prog *fp; int err; if (table->use == UINT_MAX) return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { + struct sock_fprog_kern prog; struct nft_chain_hook hook; + struct sock_filter filter; struct nf_hook_ops *ops; err = nft_chain_parse_hook(net, nla, &hook, family, create); @@ -1371,6 +1376,18 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (basechain->type->type == NFT_CHAIN_T_NAT) ops->nat_hook = true; + prog.filter = &filter; + prog.len = 0; + nft_jit_emit_basechain_policy(&prog, NF_ACCEPT); + + if (bpf_prog_create(&fp, &prog)) { + nft_chain_release_hook(&hook); + free_percpu(basechain->stats); + kfree(basechain); + return -ENOMEM; + } + RCU_INIT_POINTER(basechain->fp, fp); + chain->flags |= NFT_BASE_CHAIN; basechain->policy = policy; } else { @@ -2324,6 +2341,10 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, list_add_rcu(&rule->list, &chain->rules); } + err = nft_jit_rule(rule, &nft_jit_bpf_xfrm_desc); + if (err < 0) + goto err3; + if (nft_trans_rule_add(&ctx, NFT_MSG_NEWRULE, rule) == NULL) { err = -ENOMEM; goto err3; @@ -5702,6 +5723,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_trans *trans, *next; struct nft_trans_elem *te; + if (nft_jit_prepare(net) < 0) + return -1; + /* Bump generation counter, invalidate any dump in progress */ while (++net->nft.base_seq == 0); @@ -5827,8 +5851,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } + nft_jit_commit(net); + synchronize_rcu(); + nft_jit_release(net); + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { list_del(&trans->list); nf_tables_commit_release(trans); diff --git a/net/netfilter/nf_tables_bpf.c b/net/netfilter/nf_tables_bpf.c new file mode 100644 index 000000000000..3bc5de5ab763 --- /dev/null +++ b/net/netfilter/nf_tables_bpf.c @@ -0,0 +1,92 @@ +#include <linux/types.h> +#include <linux/filter.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_jit.h> + +#define NFT_EMIT(jit, __insn) \ + jit->insn[jit->len] = (struct sock_filter) __insn; \ + jit->len++; \ + +static int nft_jit_bpf_payload_xfrm(const struct nft_ast_expr *expr, + struct nft_ast_xfrm_state *state, + void *data) +{ + struct nft_ast_expr *right = expr->relational.right; + struct nft_ast_expr *left = expr->relational.left; + struct nft_rule_jit *jit = data; + unsigned int size; + unsigned int k; + + pr_info("> match payload at offset %u base %u len %u\n", + left->payload.offset, left->payload.base, left->len); + + switch (left->len) { + case 1: + size = BPF_B; + k = right->value.data.data[0]; + break; + case 2: + size = BPF_H; + k = ntohs(right->value.data.data[0]); + break; + case 4: + size = BPF_W; + k = ntohl(right->value.data.data[0]); + break; + default: + return -EOPNOTSUPP; + } + + NFT_EMIT(jit, BPF_STMT(BPF_LD | size | BPF_ABS, left->payload.offset)); + NFT_EMIT(jit, BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, k, 0, 1)); + NFT_EMIT(jit, BPF_STMT(BPF_RET | BPF_K, NF_DROP)); + NFT_EMIT(jit, BPF_STMT(BPF_RET | BPF_K, NF_ACCEPT)); + + return 0; +} + +static const struct nft_ast_proto_desc nft_jit_bpf_payload_desc = { + .xfrm = nft_jit_bpf_payload_xfrm, +}; + +static int nft_jit_bpf_meta_xfrm(const struct nft_ast_expr *expr, + struct nft_ast_xfrm_state *state, void *data) +{ + struct nft_ast_expr *right = expr->relational.right; + struct nft_ast_expr *left = expr->relational.left; + struct nft_rule_jit *jit = data; + unsigned int ad; + + pr_info("> generate meta match\n"); + + switch (left->meta.key) { + case NFT_META_PROTOCOL: + pr_info("meta protocol\n"); + ad = SKF_AD_PROTOCOL; + break; + break; + case NFT_META_IIF: + pr_info("meta iif\n"); + ad = SKF_AD_IFINDEX; + break; + default: + return -EOPNOTSUPP; + } + + NFT_EMIT(jit, BPF_STMT(BPF_LD | BPF_W | BPF_ABS, SKF_AD_OFF + ad)); + NFT_EMIT(jit, BPF_JUMP(BPF_JMP | BPF_JEQ, + right->value.data.data[0], 0, 1)); + NFT_EMIT(jit, BPF_STMT(BPF_RET | BPF_K, NF_DROP)); + NFT_EMIT(jit, BPF_STMT(BPF_RET | BPF_K, NF_ACCEPT)); + + return 0; +} + +static const struct nft_ast_meta_desc nft_jit_bpf_meta_desc = { + .xfrm = nft_jit_bpf_meta_xfrm, +}; + +struct nft_ast_xfrm_desc nft_jit_bpf_xfrm_desc = { + .proto_desc = &nft_jit_bpf_payload_desc, + .meta_desc = &nft_jit_bpf_meta_desc, +}; diff --git a/net/netfilter/nf_tables_jit.c b/net/netfilter/nf_tables_jit.c index 63673ea42be8..40d2b380d313 100644 --- a/net/netfilter/nf_tables_jit.c +++ b/net/netfilter/nf_tables_jit.c @@ -1,6 +1,7 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/netfilter.h> +#include <linux/filter.h> #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_jit.h> @@ -205,3 +206,134 @@ int nft_ast_xfrm(const struct list_head *ast_stmt_list, return err; } EXPORT_SYMBOL_GPL(nft_ast_xfrm); + +int nft_jit_rule(struct nft_rule *rule, + const struct nft_ast_xfrm_desc *xfrm_desc) +{ + LIST_HEAD(stmt_list); + int err; + + err = nft_delinearize(&stmt_list, rule); + if (err < 0) + return err; + + err = nft_ast_xfrm(&stmt_list, xfrm_desc, &rule->jit); + if (err < 0) + return err; + + /* TODO: Remove this, just here to print result of delinearization. */ + nft_ast_stmt_list_print(&stmt_list); + + nft_ast_stmt_list_release(&stmt_list); + + return err; +} + +void nft_jit_emit_basechain_policy(struct sock_fprog_kern *prog, + unsigned int verdict) +{ + struct sock_filter ret = BPF_STMT(BPF_RET | BPF_K, verdict); + + prog->filter[prog->len] = ret; + prog->len++; +} + +static void nft_jit_rule_add(struct sock_fprog_kern *prog, + const struct nft_rule_jit *jit) +{ + memcpy(&prog->filter[prog->len], jit->insn, + jit->len * sizeof(struct sock_filter)); + prog->len += jit->len; +} + +int nft_jit_prepare(struct net *net) +{ + struct nft_base_chain *basechain; + struct sock_fprog_kern prog; + struct sock_filter *filter; + struct nft_table *table; + struct nft_chain *chain; + struct nft_rule *rule; + unsigned int len = 1; + struct bpf_prog *fp; + + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; + + basechain = nft_base_chain(chain); + + list_for_each_entry(rule, &chain->rules, list) { + if (!nft_is_active_next(net, rule)) + continue; + + len += rule->jit.len; + } + + filter = kmalloc(len * sizeof(*filter), GFP_KERNEL); + if (!filter) + return -ENOMEM; + + prog.filter = filter; + prog.len = 0; + + list_for_each_entry(rule, &chain->rules, list) { + if (!nft_is_active_next(net, rule)) + continue; + + nft_jit_rule_add(&prog, &rule->jit); + } + + nft_jit_emit_basechain_policy(&prog, basechain->policy); + + if (bpf_prog_create(&fp, &prog)) { + kfree(filter); + return -ENOMEM; + } + + kfree(filter); + nft_base_chain(chain)->fp_stash = fp; + } + } + + return 0; +} + +void nft_jit_commit(struct net *net) +{ + struct nft_base_chain *basechain; + struct nft_table *table; + struct nft_chain *chain; + struct bpf_prog *old_fp; + + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; + + basechain = nft_base_chain(chain); + old_fp = basechain->fp; + rcu_assign_pointer(basechain->fp, basechain->fp_stash); + basechain->fp_stash = old_fp; + } + } +} + +void nft_jit_release(struct net *net) +{ + struct nft_base_chain *basechain; + struct nft_table *table; + struct nft_chain *chain; + + list_for_each_entry(table, &net->nft.tables, list) { + list_for_each_entry(chain, &table->chains, list) { + if (!nft_is_base_chain(chain)) + continue; + + basechain = nft_base_chain(chain); + bpf_prog_free(basechain->fp_stash); + basechain->fp_stash = NULL; + } + } +} -- 2.11.0 -- To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html