Recent eBPF verifier rejects program that was accepted by 6.8 eBPF verifier

Francis Deslauriers <francis.deslauriers@xxxxxxxxxxxxxxx> · Fri, 15 Nov 2024 20:59:43 +0000

Hi all,
I'd like to report what I think is a eBPF verifier regression. I have a eBPF
program that is accepted by the 6.8 eBPF verifier but is rejected on 6.9 and 
later. It's my understanding that a eBPF program that is accepted by one
version of the verifier should be accepted on all subsequent versions.

I added a stripped down libbpf reproducer based on the libbpf-bootstrap repo to
showcase this issue (see below). Note that if I change the RULE_LIST_SIZE from
380 to 30, the verifier accepts the program.

I bisected the issue down to this commit:
	commit 8ecfc371d829bfed75e0ef2cab45b2290b982f64
	Author: Maxim Mikityanskiy <maxim@xxxxxxxxxxxxx>
	Date:   Mon Jan 8 22:52:02 2024 +0200

    	bpf: Assign ID to scalars on spill

Before commit 8ecfc371d82
	$ uname -r 
	6.7.0-12291-g87e51ac6cb19

	$ sudo ./tc
	loads succesfully

After commit 8ecfc371d82
	$ uname -r
	6.7.0-12292-g8ecfc371d829

	$ sudo ./tc  2>&1 | grep -A4 "too large"
	BPF program is too large. Processed 1000001 insn
	processed 1000001 insns (limit 1000000) max_states_per_insn 26 total_states 62473 peak_states 5456 mark_read 162
	-- END PROG LOAD LOG --
	libbpf: prog 'tc_ingress': failed to load: -7
	libbpf: failed to load object 'tc_bpf'

I confirmed the issue affects master as well.
	$ uname -r
	6.12.0-rc7-00125-gcfaaa7d010d1

	$ sudo ./tc 2>&1 | grep -A4 "too large" 
	BPF program is too large. Processed 1000001 insn
	processed 1000001 insns (limit 1000000) max_states_per_insn 26 total_states 62473 peak_states 5456 mark_read 162
	-- END PROG LOAD LOG --
	libbpf: prog 'tc_ingress': failed to load: -7
	libbpf: failed to load object 'tc_bpf'

I'm investigating how this commit affects how instructions are counted
 and why it leads to such a drastic change for this particular program.
I wanted to share my findings early in case someone has any hints for me.

To reproduce, use the following file as a drop in replacement of 
libbpf-boostrap's examples/c/tc.bpf.c:
--->8---
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

char LICENSE[] SEC("license") = "GPL";

typedef enum : uint8_t {
	CONN_DIR_OUTBOUND = 0,
	CONN_DIR_INBOUND = 1,
	CONN_DIR_NEITHER = 2,
} connection_dir_t;

typedef enum : uint8_t {
	ACTION_ALLOW = 0,
	ACTION_DENY = 1,
} rule_action_t;

#define TC_ACT_UNSPEC   (-1)
#define TC_ACT_OK       0
#define TC_ACT_RECLASSIFY   1
#define TC_ACT_SHOT     2

#define AF_UNSPEC     0
#define AF_INET     2
#define ETH_P_IP 0x0800
#define ETH_P_IPV6 0x86DD

#define RULE_LIST_SIZE 380
#define RULE_CONTROLS 0x00000001

struct connection_tuple_v4 {
	uint32_t saddr;
	uint32_t daddr;
};

typedef struct pkt_info {
	struct connection_tuple_v4 ip4;
	uint16_t family;
	uint16_t ip_protocol;
	uint16_t eth_protocol;
	rule_action_t action;
	bool ip_proto_udp_or_tcp;
	uint8_t pad[7];
} pkt_info_t;

typedef struct pkt_info_rule {
	pkt_info_t info;
	uint32_t pad;
	struct connection_tuple_v4 ip4_mask;
} pkt_info_rule_t;

struct {
	__uint(type, BPF_MAP_TYPE_HASH);
	__uint(max_entries, 2);
	__type(key, u32);
	__type(value, pkt_info_rule_t[RULE_LIST_SIZE]);
	__uint(map_flags, BPF_F_RDONLY_PROG | BPF_F_WRONLY);
} rules_map SEC(".maps");

static __always_inline bool
match_ipaddr_with_mask_ipv4(const uint32_t *rule_addr,
			    const uint32_t *rule_addr_mask,
			    const uint32_t *data_packet_addr)
{
	if ((*rule_addr == 0) || (*data_packet_addr == 0)) {
		return true;
	}
	else if ((*rule_addr_mask == 0) && (*rule_addr == *data_packet_addr)) {
		return true;
	}
	else if ((*rule_addr_mask != 0) &&
		 ((*rule_addr & *rule_addr_mask) ==
		  (*data_packet_addr & *rule_addr_mask))) {
		return true;
	}
	return false;
}

static __always_inline bool protocol_match(int rule_protocol,
					   int packet_protocol)
{
	return (rule_protocol == 0 || rule_protocol == packet_protocol);
}

struct match_ipv4_pkt {
	uint32_t local_addr;
	uint32_t remote_addr;
};

static __always_inline bool match_ipv4(const pkt_info_rule_t *rule,
				       pkt_info_t *pkt,
				       const struct match_ipv4_pkt *cmp,
				       int *output)
{
	if (protocol_match(rule->info.ip_protocol, pkt->ip_protocol) &&
	    (match_ipaddr_with_mask_ipv4(&rule->info.ip4.saddr,
					 &rule->ip4_mask.saddr,
					 &cmp->local_addr)) &&
	    (match_ipaddr_with_mask_ipv4(&rule->info.ip4.daddr,
					 &rule->ip4_mask.daddr,
					 &cmp->remote_addr))) {
		if (pkt->ip_proto_udp_or_tcp) {
			return false;
		}

		*output = rule->info.action;
		return true;
	}

	return false;
}

static __always_inline int apply_rules_ipv4(pkt_info_t *pkt,
					    const pkt_info_rule_t *rules)
{
	if ((pkt == NULL) || (rules == NULL)) {
		return TC_ACT_UNSPEC;
	}

	int output = ACTION_ALLOW;
	struct match_ipv4_pkt dir_cmp;

	dir_cmp.local_addr = pkt->ip4.daddr;
	dir_cmp.remote_addr = pkt->ip4.saddr;

	for (int i = 0; i < RULE_LIST_SIZE; i++) {
		const pkt_info_rule_t rule = rules[i];

		bool ret;
		ret = match_ipv4(&rule, pkt, &dir_cmp, &output);
		if (ret) {
			break;
		}
	}

	if (output == ACTION_ALLOW) {
		return TC_ACT_UNSPEC;
	} else {
		return TC_ACT_SHOT;
	}
}

#define HEADER_BOUNDS_CHECK(hdr, skb) \
	((unsigned long)(hdr + 1) > (unsigned long)skb->data_end)

static __always_inline bool get_tuple(const struct __sk_buff *skb,
				      const struct ethhdr *eth, pkt_info_t *pkt)
{
	pkt->eth_protocol = bpf_ntohs(BPF_CORE_READ(eth, h_proto));

	if ((pkt->eth_protocol == ETH_P_IP) ||
	    ((skb->protocol == __bpf_constant_htons(ETH_P_IP)))) {
		struct iphdr *iph = (struct iphdr *)(skb->data + sizeof(*eth));

		if (HEADER_BOUNDS_CHECK(iph, skb)) {
			return false;
		}

		pkt->family = AF_INET;

		pkt->ip4.saddr = BPF_CORE_READ(iph, saddr);
		pkt->ip4.daddr = BPF_CORE_READ(iph, daddr);

		pkt->ip_protocol = BPF_CORE_READ(iph, protocol);

		pkt->ip_proto_udp_or_tcp = (pkt->ip_protocol == IPPROTO_TCP) ||
					   (pkt->ip_protocol == IPPROTO_UDP);

		return true;

	} else {
		pkt->family = AF_UNSPEC;
		return false;
	}
}

static __always_inline bool populate_pkt_info(const struct __sk_buff *skb,
					      pkt_info_t *pkt)
{
	struct ethhdr *eth = (struct ethhdr *)(unsigned long)(skb->data);

	if (HEADER_BOUNDS_CHECK(eth, skb)) {
		return false;
	}

	return get_tuple(skb, eth, pkt);
}

static __always_inline int allow_pkt(pkt_info_t *pkt)
{
	uint32_t key = pkt->family;
	const pkt_info_rule_t *rules = bpf_map_lookup_elem(&rules_map, &key);
	if (rules == NULL) {
		return TC_ACT_UNSPEC;
	}

	switch (pkt->family) {
	case AF_INET:
		return apply_rules_ipv4(pkt, rules);
	default:
		return TC_ACT_UNSPEC;
	}
}

SEC("tc")
int tc_ingress(struct __sk_buff *skb)
{
	pkt_info_t pkt ={0};

	bool ret = populate_pkt_info(skb, &pkt);
	if (ret != true) {
		return TC_ACT_UNSPEC;
	}

	return allow_pkt(&pkt);
}

--->8---

Thank you,
Francis