Re: [PATCH nf-next] netfilter: add support for matching IPv4 options

Pablo Neira Ayuso <pablo@xxxxxxxxxxxxx> · Fri, 31 May 2019 19:11:01 +0200

Hi Stephen,

On Thu, May 23, 2019 at 05:38:01AM -0400, Stephen Suryaputra wrote:
> This is the kernel change for the overall changes with this description:
> Add capability to have rules matching IPv4 options. This is developed
> mainly to support dropping of IP packets with loose and/or strict source
> route route options. Nevertheless, the implementation include others and
> ability to get specific fields in the option.

Thanks for submitting your patch.

> Signed-off-by: Stephen Suryaputra <ssuryaextr@xxxxxxxxx>
> ---
>  include/net/inet_sock.h                  |   2 +-
[...]
>  net/ipv4/ip_options.c                    |   2 +

Please, place the update of these two files (which are not netfilter
specific) in a separated (initial) patch, we'll need an Acked-by: tag
from the general networking maintainer to get this in. It will make
things easier if this comes in a separated (initial) patch.

More comments below.

>  net/netfilter/nft_exthdr.c               | 136 +++++++++++++++++++++++
>  4 files changed, 141 insertions(+), 1 deletion(-)
> 
[...]
> diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
> index a940c9fd9045..c4d47d274bbe 100644
> --- a/net/netfilter/nft_exthdr.c
> +++ b/net/netfilter/nft_exthdr.c
> @@ -62,6 +62,128 @@ static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
>  	regs->verdict.code = NFT_BREAK;
>  }
>  
> +/* find the offset to specified option or the header beyond the options
> + * if target < 0.
> + *
> + * Note that *offset is used as input/output parameter, and if it is not zero,
> + * then it must be a valid offset to an inner IPv4 header. This can be used
> + * to explore inner IPv4 header, eg. ICMP error messages.

In other extension headers (IPv6 and TCP options) this offset is used
to match for a field inside the extension / option.

So this semantics you describe here are slightly different, right?

> + * If target header is found, its offset is set in *offset and return option
> + * number. Otherwise, return negative error.
> + *
> + * If the first fragment doesn't contain the End of Options it is considered
> + * invalid.
> + */
> +static int ipv4_find_option(struct net *net, struct sk_buff *skb, unsigned int *offset,
> +		     int target, unsigned short *fragoff, int *flags)

static int ipv4_find_option(struct net *net, struct sk_buff *skb,
                            unsigned int *offset, int target,
                            unsigned short *fragoff, int *flags)
                            ^
Align parameters to parens when breaking too long lines.

Please, also break lines at 80 chars.

> +{
> +	unsigned char optbuf[sizeof(struct ip_options) + 41];
> +	struct ip_options *opt = (struct ip_options *)optbuf;
> +	struct iphdr *iph, _iph;
> +	unsigned int start;
> +	__be32 info;
> +	int optlen;
> +	bool found = false;

Please, define variables using reverse xmas tree, ie.

        unsigned char optbuf[sizeof(struct ip_options) + 41];
        struct ip_options *opt = (struct ip_options *)optbuf;
        struct iphdr *iph, _iph;
        unsigned int start;
        bool found = false;
        __be32 info;
        int optlen;

>From longest line to shortest one.

> +	if (fragoff)
> +		*fragoff = 0;
> +
> +	if (!offset)
> +		return -EINVAL;
> +	if (!*offset)
> +		*offset = skb_network_offset(skb);
> +
> +	iph = skb_header_pointer(skb, *offset, sizeof(_iph), &_iph);
> +	if (!iph || iph->version != 4)
> +		return -EBADMSG;
> +	start = *offset + sizeof(struct iphdr);
> +
> +	optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
> +	if (optlen <= 0)
> +		return -ENOENT;

You could just:

                return -1;

in all these errors in ipv4_find_option() since nft_exthdr_ipv4_eval()
does not use it.

> +	memset(opt, 0, sizeof(struct ip_options));
> +	/* Copy the options since __ip_options_compile() modifies
> +	 * the options. Get one byte beyond the option for target < 0
> +	 */
> +	if (skb_copy_bits(skb, start, opt->__data, optlen + 1))
> +		return -EBADMSG;
> +	opt->optlen = optlen;
> +
> +	if (__ip_options_compile(net, opt, NULL, &info))
> +		return -EBADMSG;
> +
> +	switch (target) {
> +	case IPOPT_SSRR:
> +	case IPOPT_LSRR:
> +		if (opt->srr) {

I'd suggest:

                if (!opt->srr)
                        break;

So you save one level of indentation below.

> +			found = target == IPOPT_SSRR ? opt->is_strictroute :
> +						       !opt->is_strictroute;
> +			if (found)
> +				*offset = opt->srr + start;
> +		}
> +		break;
> +	case IPOPT_RR:
> +		if (opt->rr) {

same here:

                if (!opt->rr)
                        break;

and same thing for other extensions.

> +			*offset = opt->rr + start;
> +			found = true;
> +		}
> +		break;
> +	case IPOPT_RA:
> +		if (opt->router_alert) {
> +			*offset = opt->router_alert + start;
> +			found = true;
> +		}
> +		break;
> +	default:
> +		/* Either not supported or not a specific search, treated as found */
> +		found = true;
> +		if (target < 0) {
> +			if (opt->end) {
> +				*offset = opt->end + start;
> +				target = IPOPT_END;
> +			} else {
> +				/* Point to beyond the options. */
> +				*offset = optlen + start;
> +				target = opt->__data[optlen];
> +			}
> +		} else {
> +			target = -EOPNOTSUPP;
> +		}
> +	}
> +	if (!found)
> +		target = -ENOENT;
> +	return target;

Hm, 'target' value is never used, right?

> +}
> +
> +static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
> +				 struct nft_regs *regs,
> +				 const struct nft_pktinfo *pkt)
> +{
> +	struct nft_exthdr *priv = nft_expr_priv(expr);
> +	u32 *dest = &regs->data[priv->dreg];
> +	struct sk_buff *skb = pkt->skb;
> +	unsigned int offset = 0;
> +	int err;
> +
> +	err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type, NULL, NULL);
> +	if (priv->flags & NFT_EXTHDR_F_PRESENT) {
> +		*dest = (err >= 0);
> +		return;
> +	} else if (err < 0) {
> +		goto err;
> +	}
> +	offset += priv->offset;
> +
> +	dest[priv->len / NFT_REG32_SIZE] = 0;
> +	if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
> +		goto err;
> +	return;
> +err:
> +	regs->verdict.code = NFT_BREAK;
> +}
> +
>  static void *
>  nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
>  		       unsigned int len, void *buffer, unsigned int *tcphdr_len)
> @@ -360,6 +482,14 @@ static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
>  	.dump		= nft_exthdr_dump,
>  };
>  
> +static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
> +	.type		= &nft_exthdr_type,
> +	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
> +	.eval		= nft_exthdr_ipv4_eval,
> +	.init		= nft_exthdr_init,
> +	.dump		= nft_exthdr_dump,
> +};
> +
>  static const struct nft_expr_ops nft_exthdr_tcp_ops = {
>  	.type		= &nft_exthdr_type,
>  	.size		= NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
> @@ -400,6 +530,12 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
>  		if (tb[NFTA_EXTHDR_DREG])
>  			return &nft_exthdr_ipv6_ops;
>  		break;
> +	case NFT_EXTHDR_OP_IPV4:
> +		if (ctx->family == NFPROTO_IPV4) {

This should also work for the NFPROTO_INET (inet tables), NFPROTO_BRIDGE
and the NFPROTO_NETDEV families.

I would turn this into:

		if (ctx->family != NFPROTO_IPV6) {

> +			if (tb[NFTA_EXTHDR_DREG])
> +				return &nft_exthdr_ipv4_ops;
> +		}
> +		break;

Then, from the _eval() path:

You have to replace iph->version == 4 check abive, you could use
skb->protocol instead, and check for htons(ETH_P_IP) packets.

Thanks!