Re: [PATCH bpf-next] bpftool: Restore support for BPF offload-enabled feature probing

Quentin Monnet <quentin@xxxxxxxxxxxxx> · Tue, 8 Mar 2022 14:23:30 +0000

2022-03-08 12:30 UTC+0100 ~ Niklas Söderlund <niklas.soderlund@xxxxxxxxxxxx>
> Commit 1a56c18e6c2e4e74 ("bpftool: Stop supporting BPF offload-enabled
> feature probing") removed the support to probe for BPF offload features.
> This is still something that is useful for NFP NIC that can support
> offloading of BPF programs.
> 
> The reason for the dropped support was that libbpf starting with v1.0
> would drop support for passing the ifindex to the BPF prog/map/helper
> feature probing APIs. In order to keep this useful feature for NFP
> restore the functionality by moving it directly into bpftool.
> 
> The code restored is a simplified version of the code that existed in
> libbpf which supposed passing the ifindex. The simplification is that it
> only targets the cases where ifindex is given and call into libbpf for
> the cases where it's not.
> 
> Before restoring support for probing offload features:
> 
>   # bpftool feature probe dev ens4np0
>   Scanning system call availability...
>   bpf() syscall is available
> 
>   Scanning eBPF program types...
> 
>   Scanning eBPF map types...
> 
>   Scanning eBPF helper functions...
>   eBPF helpers supported for program type sched_cls:
>   eBPF helpers supported for program type xdp:
> 
>   Scanning miscellaneous eBPF features...
>   Large program size limit is NOT available
>   Bounded loop support is NOT available
>   ISA extension v2 is NOT available
>   ISA extension v3 is NOT available
> 
> With support for probing offload features restored:
> 
>   # bpftool feature probe dev ens4np0
>   Scanning system call availability...
>   bpf() syscall is available
> 
>   Scanning eBPF program types...
>   eBPF program_type sched_cls is available
>   eBPF program_type xdp is available
> 
>   Scanning eBPF map types...
>   eBPF map_type hash is available
>   eBPF map_type array is available
>   eBPF map_type prog_array is NOT available
>   eBPF map_type perf_event_array is NOT available
>   eBPF map_type percpu_hash is NOT available
>   eBPF map_type percpu_array is NOT available
>   eBPF map_type stack_trace is NOT available
>   eBPF map_type cgroup_array is NOT available
>   eBPF map_type lru_hash is NOT available
>   eBPF map_type lru_percpu_hash is NOT available
>   eBPF map_type lpm_trie is NOT available
>   eBPF map_type array_of_maps is NOT available
>   eBPF map_type hash_of_maps is NOT available
>   eBPF map_type devmap is NOT available
>   eBPF map_type sockmap is NOT available
>   eBPF map_type cpumap is NOT available
>   eBPF map_type xskmap is NOT available
>   eBPF map_type sockhash is NOT available
>   eBPF map_type cgroup_storage is NOT available
>   eBPF map_type reuseport_sockarray is NOT available
>   eBPF map_type percpu_cgroup_storage is NOT available
>   eBPF map_type queue is NOT available
>   eBPF map_type stack is NOT available
>   eBPF map_type sk_storage is NOT available
>   eBPF map_type devmap_hash is NOT available
>   eBPF map_type struct_ops is NOT available
>   eBPF map_type ringbuf is NOT available
>   eBPF map_type inode_storage is NOT available
>   eBPF map_type task_storage is NOT available
>   eBPF map_type bloom_filter is NOT available
> 
>   Scanning eBPF helper functions...
>   eBPF helpers supported for program type sched_cls:
>   	- bpf_map_lookup_elem
>   	- bpf_get_prandom_u32
>   	- bpf_perf_event_output
>   eBPF helpers supported for program type xdp:
>   	- bpf_map_lookup_elem
>   	- bpf_get_prandom_u32
>   	- bpf_perf_event_output
>   	- bpf_xdp_adjust_head
>   	- bpf_xdp_adjust_tail
> 
>   Scanning miscellaneous eBPF features...
>   Large program size limit is NOT available
>   Bounded loop support is NOT available
>   ISA extension v2 is NOT available
>   ISA extension v3 is NOT available
> 
> Signed-off-by: Niklas Söderlund <niklas.soderlund@xxxxxxxxxxxx>
> Signed-off-by: Simon Horman <simon.horman@xxxxxxxxxxxx>
> ---
>  tools/bpf/bpftool/feature.c | 185 +++++++++++++++++++++++++++++++++---
>  1 file changed, 170 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
> index 9c894b1447de8cf0..4943beb1823111c8 100644
> --- a/tools/bpf/bpftool/feature.c
> +++ b/tools/bpf/bpftool/feature.c
> @@ -3,6 +3,7 @@
>  
>  #include <ctype.h>
>  #include <errno.h>
> +#include <fcntl.h>
>  #include <string.h>
>  #include <unistd.h>
>  #include <net/if.h>
> @@ -45,6 +46,11 @@ static bool run_as_unprivileged;
>  
>  /* Miscellaneous utility functions */
>  
> +static bool grep(const char *buffer, const char *pattern)
> +{
> +	return !!strstr(buffer, pattern);
> +}
> +
>  static bool check_procfs(void)
>  {
>  	struct statfs st_fs;
> @@ -135,6 +141,32 @@ static void print_end_section(void)
>  
>  /* Probing functions */
>  
> +static int get_vendor_id(int ifindex)
> +{
> +	char ifname[IF_NAMESIZE], path[64], buf[8];
> +	ssize_t len;
> +	int fd;
> +
> +	if (!if_indextoname(ifindex, ifname))
> +		return -1;
> +
> +	snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname);
> +
> +	fd = open(path, O_RDONLY | O_CLOEXEC);
> +	if (fd < 0)
> +		return -1;
> +
> +	len = read(fd, buf, sizeof(buf));
> +	close(fd);
> +	if (len < 0)
> +		return -1;
> +	if (len >= (ssize_t)sizeof(buf))
> +		return -1;
> +	buf[len] = '\0';
> +
> +	return strtol(buf, NULL, 0);
> +}
> +
>  static int read_procfs(const char *path)
>  {
>  	char *endptr, *line = NULL;
> @@ -478,6 +510,69 @@ static bool probe_bpf_syscall(const char *define_prefix)
>  	return res;
>  }
>  
> +static int
> +probe_prog_load_ifindex(enum bpf_prog_type prog_type,
> +			const struct bpf_insn *insns, size_t insns_cnt,
> +			char *log_buf, size_t log_buf_sz,
> +			__u32 ifindex)
> +{
> +	LIBBPF_OPTS(bpf_prog_load_opts, opts,
> +		    .log_buf = log_buf,
> +		    .log_size = log_buf_sz,
> +		    .log_level = log_buf ? 1 : 0,
> +		    .prog_ifindex = ifindex,
> +		   );
> +	const char *exp_msg = NULL;
> +	int fd, err, exp_err = 0;
> +	char buf[4096];
> +
> +	switch (prog_type) {
> +	case BPF_PROG_TYPE_SCHED_CLS:
> +	case BPF_PROG_TYPE_XDP:
> +		break;
> +	default:
> +		return -EOPNOTSUPP;

This will not be caught in probe_prog_type_ifindex(), where you only
check for the errno value, will it? You should also check the return
code from probe_prog_load_ifindex()? (Same thing in probe_helper_ifindex()).

You could also get rid of this switch entirely, because the function is
never called with a program type other than TC or XDP (given that you
already check in probe_prog_type(), and helper probes are only run
against supported program tyeps).

> +	}
> +
> +	fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
> +	err = -errno;
> +	if (fd >= 0)
> +		close(fd);
> +	if (exp_err) {

exp_err is always 0, you don't need this part. I think this is a
leftover of the previous libbpf probes.

> +		if (fd >= 0 || err != exp_err)
> +			return 0;
> +		if (exp_msg && !strstr(buf, exp_msg))
> +			return 0;
> +		return 1;
> +	}
> +	return fd >= 0 ? 1 : 0;
> +}
> +
> +static bool probe_prog_type_ifindex(enum bpf_prog_type prog_type, __u32 ifindex)
> +{
> +	struct bpf_insn insns[2] = {
> +		BPF_MOV64_IMM(BPF_REG_0, 0),
> +		BPF_EXIT_INSN()
> +	};
> +
> +	switch (prog_type) {
> +	case BPF_PROG_TYPE_SCHED_CLS:
> +		/* nfp returns -EINVAL on exit(0) with TC offload */
> +		insns[0].imm = 2;
> +		break;
> +	case BPF_PROG_TYPE_XDP:
> +		break;
> +	default:
> +		return false;
> +	}
> +
> +	errno = 0;
> +	probe_prog_load_ifindex(prog_type, insns, ARRAY_SIZE(insns), NULL, 0,
> +				ifindex);
> +
> +	return errno != EINVAL && errno != EOPNOTSUPP;
> +}
> +
>  static void
>  probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
>  		const char *define_prefix, __u32 ifindex)
> @@ -488,11 +583,19 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
>  	bool res;
>  
>  	if (ifindex) {
> -		p_info("BPF offload feature probing is not supported");
> -		return;
> +		switch (prog_type) {
> +		case BPF_PROG_TYPE_SCHED_CLS:
> +		case BPF_PROG_TYPE_XDP:
> +			break;
> +		default:
> +			return;
> +		}

Here we skip the probe entirely (we don't print a result, even negative)
for types that are not supported by the SmartNICs today. But for map
types, the equivalent switch is in probe_map_type_ifindex(), and it
skips the actual bpf() syscall but it doesn't skip the part where we
print a result.

This means that the output for program types shows the result for just
TC/XDP, while the output for map types shows the result for all maps
known to bpftool, even if we “know” they are not supported for offload.
This shows in your commit description. Could we harmonise between maps
and programs? I don't mind much either way you choose, printing all or
printing few.

Thanks,
Quentin