This allows to replace a tcp option with nop padding to selectively disable a particular tcp option. Optstrip mode is chosen when userspace passes the exthdr expression with neither a source nor a destination register attribute. This is identical to xtables TCPOPTSTRIP extension. Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- proposed userspace syntax is: nft add rule f in delete tcp option sack-perm net/netfilter/nft_exthdr.c | 88 +++++++++++++++++++++++++++++++++++++- 1 file changed, 87 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index dbe1f2e7dd9e..37e427ca2b34 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -308,6 +308,55 @@ static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int i, tcphdr_len, optl; + struct tcphdr *tcph; + u8 *opt; + + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); + if (!tcph) + goto err; + + if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len)) + goto err; + + opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); + if (!opt) + goto err; + for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { + unsigned int j; + + optl = optlen(opt, i); + if (priv->type != opt[i]) + continue; + + if (i + optl > tcphdr_len) + goto err; + + for (j = 0; j < optl; ++j) { + u16 n = TCPOPT_NOP; + u16 o = opt[i+j]; + + if ((i + j) % 2 == 0) { + o <<= 8; + n <<= 8; + } + inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o), + htons(n), false); + } + memset(opt + i, TCPOPT_NOP, optl); + return; + } + return; +err: + regs->verdict.code = NFT_BREAK; +} + static void nft_exthdr_sctp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -457,6 +506,28 @@ static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, priv->len); } +static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_exthdr *priv = nft_expr_priv(expr); + + if (tb[NFTA_EXTHDR_SREG] || + tb[NFTA_EXTHDR_DREG] || + tb[NFTA_EXTHDR_FLAGS] || + tb[NFTA_EXTHDR_OFFSET] || + tb[NFTA_EXTHDR_LEN]) + return -EINVAL; + + if (!tb[NFTA_EXTHDR_TYPE]) + return -EINVAL; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->op = NFT_EXTHDR_OP_TCPOPT; + + return 0; +} + static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -517,6 +588,13 @@ static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr) return nft_exthdr_dump_common(skb, priv); } +static int nft_exthdr_dump_strip(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + return nft_exthdr_dump_common(skb, priv); +} + static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), @@ -549,6 +627,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { .dump = nft_exthdr_dump_set, }; +static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_tcp_strip_eval, + .init = nft_exthdr_tcp_strip_init, + .dump = nft_exthdr_dump_strip, +}; + static const struct nft_expr_ops nft_exthdr_sctp_ops = { .type = &nft_exthdr_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), @@ -576,7 +662,7 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, return &nft_exthdr_tcp_set_ops; if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_tcp_ops; - break; + return &nft_exthdr_tcp_strip_ops; case NFT_EXTHDR_OP_IPV6: if (tb[NFTA_EXTHDR_DREG]) return &nft_exthdr_ipv6_ops; -- 2.33.1