Re: [PATCH bpf-next 2/2] selftest/bpf: Implement sample UNIX domain socket iterator program.

Yonghong Song <yhs@xxxxxx> · Thu, 29 Jul 2021 23:54:26 -0700

On 7/29/21 4:36 PM, Kuniyuki Iwashima wrote:
If there are no abstract sockets, this prog can output the same result
compared to /proc/net/unix.

   # cat /sys/fs/bpf/unix | head -n 2
   Num       RefCount Protocol Flags    Type St Inode Path
   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

   # cat /proc/net/unix | head -n 2
   Num       RefCount Protocol Flags    Type St Inode Path
   ffff9ab7122db000: 00000002 00000000 00010000 0001 01 10623 private/defer

Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx>
---
  .../selftests/bpf/prog_tests/bpf_iter.c       | 17 +++++
  .../selftests/bpf/progs/bpf_iter_unix.c       | 75 +++++++++++++++++++
  2 files changed, 92 insertions(+)
  create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_unix.c

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 1f1aade56504..4746bac68d36 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -13,6 +13,7 @@
  #include "bpf_iter_tcp6.skel.h"
  #include "bpf_iter_udp4.skel.h"
  #include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
  #include "bpf_iter_test_kern1.skel.h"
  #include "bpf_iter_test_kern2.skel.h"
  #include "bpf_iter_test_kern3.skel.h"
@@ -313,6 +314,20 @@ static void test_udp6(void)
  	bpf_iter_udp6__destroy(skel);
  }
  
+static void test_unix(void)
+{
+	struct bpf_iter_unix *skel;
+
+	skel = bpf_iter_unix__open_and_load();
+	if (CHECK(!skel, "bpf_iter_unix__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_unix);
+
+	bpf_iter_unix__destroy(skel);
+}
+
  /* The expected string is less than 16 bytes */
  static int do_read_with_fd(int iter_fd, const char *expected,
  			   bool read_one_char)
@@ -1255,6 +1270,8 @@ void test_bpf_iter(void)
  		test_udp4();
  	if (test__start_subtest("udp6"))
  		test_udp6();
+	if (test__start_subtest("unix"))
+		test_unix();
  	if (test__start_subtest("anon"))
  		test_anon_iter(false);
  	if (test__start_subtest("anon-read-one-char"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..285ec2f7944d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"

Could you add bpf_iter__unix to bpf_iter.h similar to bpf_iter__sockmap?
The main purpose is to make test tolerating with old vmlinux.h.

+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define __SO_ACCEPTCON		(1 << 16)
+#define UNIX_HASH_SIZE		256
+#define UNIX_ABSTRACT(unix_sk)	(unix_sk->addr->hash < UNIX_HASH_SIZE)

Could you add the above three define's in bpf_tracing_net.h?
We try to keep all these common defines in a common header for
potential reusability.

+
+static long sock_i_ino(const struct sock *sk)
+{
+	const struct socket *sk_socket = sk->sk_socket;
+	const struct inode *inode;
+	unsigned long ino;
+
+	if (!sk_socket)
+		return 0;
+
+	inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+	bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+	return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+	struct unix_sock *unix_sk = ctx->unix_sk;
+	struct sock *sk = (struct sock *)unix_sk;
+	struct seq_file *seq;
+	__u32 seq_num;
+
+	if (!unix_sk)
+		return 0;
+
+	seq = ctx->meta->seq;
+	seq_num = ctx->meta->seq_num;
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "Num       RefCount Protocol Flags    "
+			       "Type St Inode Path\n");
+
+	BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
+		       unix_sk,
+		       sk->sk_refcnt.refs.counter,
+		       0,
+		       sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+		       sk->sk_type,
+		       sk->sk_socket ?
+		       (sk->sk_state == TCP_ESTABLISHED ?
+			SS_CONNECTED : SS_UNCONNECTED) :
+		       (sk->sk_state == TCP_ESTABLISHED ?
+			SS_CONNECTING : SS_DISCONNECTING),
+		       sock_i_ino(sk));
+
+	if (unix_sk->addr) {
+		if (UNIX_ABSTRACT(unix_sk))
+			/* Abstract UNIX domain socket can contain '\0' in
+			 * the path, and it should be escaped.  However, it
+			 * requires loops and the BPF verifier rejects it.
+			 * So here, print only the escaped first byte to
+			 * indicate it is an abstract UNIX domain socket.
+			 * (See: unix_seq_show() and commit e7947ea770d0d)
+			 */
+			BPF_SEQ_PRINTF(seq, " @");
+		else
+			BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+	}

I looked at af_unix.c, for the above "if (unix_sk->addr) { ... }" code,
the following is the kernel source code,

                if (u->addr) {  // under unix_table_lock here
                        int i, len;
                        seq_putc(seq, ' ');

                        i = 0;
                        len = u->addr->len - sizeof(short);
                        if (!UNIX_ABSTRACT(s))
                                len--;
                        else {
                                seq_putc(seq, '@');
                                i++;
                        }
                        for ( ; i < len; i++)
                                seq_putc(seq, u->addr->name->sun_path[i] ?:
                                         '@');
                }

It does not seem to match bpf program non UNIX_ABSTRACT case.
I am not familiar with unix socket so it would be good if you can 
explain a little more.

For verifier issue with loops, do we have a maximum upper bound for 
u->addr->len? If yes, does bounded loop work?

+
+	BPF_SEQ_PRINTF(seq, "\n");
+
+	return 0;
+}