On Wed, Apr 8, 2020 at 4:26 PM Yonghong Song <yhs@xxxxxx> wrote: > > Two bpf programs are added in this patch for netlink and ipv6_route > target. On my VM, I am able to achieve identical > results compared to /proc/net/netlink and /proc/net/ipv6_route. > > $ cat /proc/net/netlink > sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > .... > 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > $ cat /sys/kernel/bpfdump/netlink/my1 > sk Eth Pid Groups Rmem Wmem Dump Locks Drops Inode > 000000002c42d58b 0 0 00000000 0 0 0 2 0 7 > 00000000a4e8b5e1 0 1 00000551 0 0 0 2 0 18719 > 00000000e1b1c195 4 0 00000000 0 0 0 2 0 16422 > 000000007e6b29f9 6 0 00000000 0 0 0 2 0 16424 > .... > 00000000159a170d 15 1862 00000002 0 0 0 2 0 1886 > 000000009aca4bc9 15 3918224839 00000002 0 0 0 2 0 19076 > 00000000d0ab31d2 15 1 00000002 0 0 0 2 0 18683 > 000000008398fb08 16 0 00000000 0 0 0 2 0 27 > > $ cat /proc/net/ipv6_route > fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > $ cat /sys/kernel/bpfdump/ipv6_route/my1 > fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > 00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001 lo > fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001 eth0 > ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001 eth0 > 00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200 lo > > Signed-off-by: Yonghong Song <yhs@xxxxxx> > --- > .../selftests/bpf/progs/bpfdump_ipv6_route.c | 63 ++++++++++++++++ > .../selftests/bpf/progs/bpfdump_netlink.c | 74 +++++++++++++++++++ > 2 files changed, 137 insertions(+) > create mode 100644 tools/testing/selftests/bpf/progs/bpfdump_ipv6_route.c > create mode 100644 tools/testing/selftests/bpf/progs/bpfdump_netlink.c > > diff --git a/tools/testing/selftests/bpf/progs/bpfdump_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpfdump_ipv6_route.c > new file mode 100644 > index 000000000000..590e56791052 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bpfdump_ipv6_route.c > @@ -0,0 +1,63 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > +#include <bpf/bpf_endian.h> > + > +char _license[] SEC("license") = "GPL"; > + > +extern bool CONFIG_IPV6_SUBTREES __kconfig __weak; > + > +#define RTF_GATEWAY 0x0002 > +#define IFNAMSIZ 16 > +#define fib_nh_gw_family nh_common.nhc_gw_family > +#define fib_nh_gw6 nh_common.nhc_gw.ipv6 > +#define fib_nh_dev nh_common.nhc_dev > + > +SEC("dump//sys/kernel/bpfdump/ipv6_route") > +int BPF_PROG(dump_ipv6_route, struct fib6_info *rt, struct seq_file *seq, u64 seq_num) > +{ > + struct fib6_nh *fib6_nh = &rt->fib6_nh[0]; > + unsigned int flags = rt->fib6_flags; > + const struct net_device *dev; > + struct nexthop *nh; > + static const char fmt1[] = "%pi6 %02x "; > + static const char fmt2[] = "%pi6 "; > + static const char fmt3[] = "00000000000000000000000000000000 "; > + static const char fmt4[] = "%08x %08x "; > + static const char fmt5[] = "%8s\n"; > + static const char fmt6[] = "\n"; > + static const char fmt7[] = "00000000000000000000000000000000 00 "; > + > + /* FIXME: nexthop_is_multipath is not handled here. */ > + nh = rt->nh; > + if (rt->nh) > + fib6_nh = &nh->nh_info->fib6_nh; > + > + bpf_seq_printf(seq, fmt1, sizeof(fmt1), &rt->fib6_dst.addr, > + rt->fib6_dst.plen); > + > + if (CONFIG_IPV6_SUBTREES) > + bpf_seq_printf(seq, fmt1, sizeof(fmt1), &rt->fib6_src.addr, > + rt->fib6_src.plen); > + else > + bpf_seq_printf(seq, fmt7, sizeof(fmt7)); > + > + if (fib6_nh->fib_nh_gw_family) { > + flags |= RTF_GATEWAY; > + bpf_seq_printf(seq, fmt2, sizeof(fmt2), &fib6_nh->fib_nh_gw6); > + } else { > + bpf_seq_printf(seq, fmt3, sizeof(fmt3)); > + } > + > + dev = fib6_nh->fib_nh_dev; > + bpf_seq_printf(seq, fmt4, sizeof(fmt4), rt->fib6_metric, rt->fib6_ref.refs.counter); > + bpf_seq_printf(seq, fmt4, sizeof(fmt4), 0, flags); > + if (dev) > + bpf_seq_printf(seq, fmt5, sizeof(fmt5), dev->name); > + else > + bpf_seq_printf(seq, fmt6, sizeof(fmt6)); > + > + return 0; > +} > diff --git a/tools/testing/selftests/bpf/progs/bpfdump_netlink.c b/tools/testing/selftests/bpf/progs/bpfdump_netlink.c > new file mode 100644 > index 000000000000..37c9be546b99 > --- /dev/null > +++ b/tools/testing/selftests/bpf/progs/bpfdump_netlink.c > @@ -0,0 +1,74 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2020 Facebook */ > +#include "vmlinux.h" > +#include <bpf/bpf_helpers.h> > +#include <bpf/bpf_tracing.h> > +#include <bpf/bpf_endian.h> > + > +char _license[] SEC("license") = "GPL"; > + > +#define sk_rmem_alloc sk_backlog.rmem_alloc > +#define sk_refcnt __sk_common.skc_refcnt > + > +#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER) > +#define container_of(ptr, type, member) ({ \ > + void *__mptr = (void *)(ptr); \ > + ((type *)(__mptr - offsetof(type, member))); }) > + > +static inline struct inode *SOCK_INODE(struct socket *socket) > +{ > + return &container_of(socket, struct socket_alloc, socket)->vfs_inode; > +} > + > +SEC("dump//sys/kernel/bpfdump/netlink") We discussed already on previous patch, but just to put it visually into comparison: SEC("dump/netlink") looks so much nicer :) > +int BPF_PROG(dump_netlink, struct netlink_sock *nlk, struct seq_file *seq, u64 seq_num) > +{ > + static const char banner[] = > + "sk Eth Pid Groups " > + "Rmem Wmem Dump Locks Drops Inode\n"; > + static const char fmt1[] = "%pK %-3d "; > + static const char fmt2[] = "%-10u %08x "; > + static const char fmt3[] = "%-8d %-8d "; > + static const char fmt4[] = "%-5d %-8d "; > + static const char fmt5[] = "%-8u %-8lu\n"; > + struct sock *s = &nlk->sk; > + unsigned long group, ino; > + struct inode *inode; > + struct socket *sk; > + > + if (seq_num == 0) > + bpf_seq_printf(seq, banner, sizeof(banner)); > + > + bpf_seq_printf(seq, fmt1, sizeof(fmt1), s, s->sk_protocol); > + > + if (!nlk->groups) { > + group = 0; > + } else { > + /* FIXME: temporary use bpf_probe_read here, needs > + * verifier support to do direct access. > + */ > + bpf_probe_read(&group, sizeof(group), &nlk->groups[0]); Is this what's being fixed by patch #10? > + } > + bpf_seq_printf(seq, fmt2, sizeof(fmt2), nlk->portid, (u32)group); > + > + > + bpf_seq_printf(seq, fmt3, sizeof(fmt3), s->sk_rmem_alloc.counter, > + s->sk_wmem_alloc.refs.counter - 1); > + bpf_seq_printf(seq, fmt4, sizeof(fmt4), nlk->cb_running, > + s->sk_refcnt.refs.counter); > + > + sk = s->sk_socket; > + if (!sk) { > + ino = 0; > + } else { > + /* FIXME: container_of inside SOCK_INODE has a forced > + * type conversion, and direct access cannot be used > + * with current verifier. > + */ > + inode = SOCK_INODE(sk); > + bpf_probe_read(&ino, sizeof(ino), &inode->i_ino); > + } > + bpf_seq_printf(seq, fmt5, sizeof(fmt5), s->sk_drops.counter, ino); > + > + return 0; > +} > -- > 2.24.1 >