Hi Stephen, On Thu, May 23, 2019 at 05:38:01AM -0400, Stephen Suryaputra wrote: > This is the kernel change for the overall changes with this description: > Add capability to have rules matching IPv4 options. This is developed > mainly to support dropping of IP packets with loose and/or strict source > route route options. Nevertheless, the implementation include others and > ability to get specific fields in the option. Thanks for submitting your patch. > Signed-off-by: Stephen Suryaputra <ssuryaextr@xxxxxxxxx> > --- > include/net/inet_sock.h | 2 +- [...] > net/ipv4/ip_options.c | 2 + Please, place the update of these two files (which are not netfilter specific) in a separated (initial) patch, we'll need an Acked-by: tag from the general networking maintainer to get this in. It will make things easier if this comes in a separated (initial) patch. More comments below. > net/netfilter/nft_exthdr.c | 136 +++++++++++++++++++++++ > 4 files changed, 141 insertions(+), 1 deletion(-) > [...] > diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c > index a940c9fd9045..c4d47d274bbe 100644 > --- a/net/netfilter/nft_exthdr.c > +++ b/net/netfilter/nft_exthdr.c > @@ -62,6 +62,128 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr, > regs->verdict.code = NFT_BREAK; > } > > +/* find the offset to specified option or the header beyond the options > + * if target < 0. > + * > + * Note that *offset is used as input/output parameter, and if it is not zero, > + * then it must be a valid offset to an inner IPv4 header. This can be used > + * to explore inner IPv4 header, eg. ICMP error messages. In other extension headers (IPv6 and TCP options) this offset is used to match for a field inside the extension / option. So this semantics you describe here are slightly different, right? > + * If target header is found, its offset is set in *offset and return option > + * number. Otherwise, return negative error. > + * > + * If the first fragment doesn't contain the End of Options it is considered > + * invalid. > + */ > +static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned int *offset, > + int target, unsigned short *fragoff, int *flags) static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned int *offset, int target, unsigned short *fragoff, int *flags) ^ Align parameters to parens when breaking too long lines. Please, also break lines at 80 chars. > +{ > + unsigned char optbuf[sizeof(struct ip_options) + 41]; > + struct ip_options *opt = (struct ip_options *)optbuf; > + struct iphdr *iph, _iph; > + unsigned int start; > + __be32 info; > + int optlen; > + bool found = false; Please, define variables using reverse xmas tree, ie. unsigned char optbuf[sizeof(struct ip_options) + 41]; struct ip_options *opt = (struct ip_options *)optbuf; struct iphdr *iph, _iph; unsigned int start; bool found = false; __be32 info; int optlen; >From longest line to shortest one. > + if (fragoff) > + *fragoff = 0; > + > + if (!offset) > + return -EINVAL; > + if (!*offset) > + *offset = skb_network_offset(skb); > + > + iph = skb_header_pointer(skb, *offset, sizeof(_iph), &_iph); > + if (!iph || iph->version != 4) > + return -EBADMSG; > + start = *offset + sizeof(struct iphdr); > + > + optlen = iph->ihl * 4 - (int)sizeof(struct iphdr); > + if (optlen <= 0) > + return -ENOENT; You could just: return -1; in all these errors in ipv4_find_option() since nft_exthdr_ipv4_eval() does not use it. > + memset(opt, 0, sizeof(struct ip_options)); > + /* Copy the options since __ip_options_compile() modifies > + * the options. Get one byte beyond the option for target < 0 > + */ > + if (skb_copy_bits(skb, start, opt->__data, optlen + 1)) > + return -EBADMSG; > + opt->optlen = optlen; > + > + if (__ip_options_compile(net, opt, NULL, &info)) > + return -EBADMSG; > + > + switch (target) { > + case IPOPT_SSRR: > + case IPOPT_LSRR: > + if (opt->srr) { I'd suggest: if (!opt->srr) break; So you save one level of indentation below. > + found = target == IPOPT_SSRR ? opt->is_strictroute : > + !opt->is_strictroute; > + if (found) > + *offset = opt->srr + start; > + } > + break; > + case IPOPT_RR: > + if (opt->rr) { same here: if (!opt->rr) break; and same thing for other extensions. > + *offset = opt->rr + start; > + found = true; > + } > + break; > + case IPOPT_RA: > + if (opt->router_alert) { > + *offset = opt->router_alert + start; > + found = true; > + } > + break; > + default: > + /* Either not supported or not a specific search, treated as found */ > + found = true; > + if (target < 0) { > + if (opt->end) { > + *offset = opt->end + start; > + target = IPOPT_END; > + } else { > + /* Point to beyond the options. */ > + *offset = optlen + start; > + target = opt->__data[optlen]; > + } > + } else { > + target = -EOPNOTSUPP; > + } > + } > + if (!found) > + target = -ENOENT; > + return target; Hm, 'target' value is never used, right? > +} > + > +static void nft_exthdr_ipv4_eval(const struct nft_expr *expr, > + struct nft_regs *regs, > + const struct nft_pktinfo *pkt) > +{ > + struct nft_exthdr *priv = nft_expr_priv(expr); > + u32 *dest = ®s->data[priv->dreg]; > + struct sk_buff *skb = pkt->skb; > + unsigned int offset = 0; > + int err; > + > + err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type, NULL, NULL); > + if (priv->flags & NFT_EXTHDR_F_PRESENT) { > + *dest = (err >= 0); > + return; > + } else if (err < 0) { > + goto err; > + } > + offset += priv->offset; > + > + dest[priv->len / NFT_REG32_SIZE] = 0; > + if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0) > + goto err; > + return; > +err: > + regs->verdict.code = NFT_BREAK; > +} > + > static void * > nft_tcp_header_pointer(const struct nft_pktinfo *pkt, > unsigned int len, void *buffer, unsigned int *tcphdr_len) > @@ -360,6 +482,14 @@ static const struct nft_expr_ops nft_exthdr_ipv6_ops = { > .dump = nft_exthdr_dump, > }; > > +static const struct nft_expr_ops nft_exthdr_ipv4_ops = { > + .type = &nft_exthdr_type, > + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), > + .eval = nft_exthdr_ipv4_eval, > + .init = nft_exthdr_init, > + .dump = nft_exthdr_dump, > +}; > + > static const struct nft_expr_ops nft_exthdr_tcp_ops = { > .type = &nft_exthdr_type, > .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), > @@ -400,6 +530,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, > if (tb[NFTA_EXTHDR_DREG]) > return &nft_exthdr_ipv6_ops; > break; > + case NFT_EXTHDR_OP_IPV4: > + if (ctx->family == NFPROTO_IPV4) { This should also work for the NFPROTO_INET (inet tables), NFPROTO_BRIDGE and the NFPROTO_NETDEV families. I would turn this into: if (ctx->family != NFPROTO_IPV6) { > + if (tb[NFTA_EXTHDR_DREG]) > + return &nft_exthdr_ipv4_ops; > + } > + break; Then, from the _eval() path: You have to replace iph->version == 4 check abive, you could use skb->protocol instead, and check for htons(ETH_P_IP) packets. Thanks!