Re: [PATCH bpf-next 05/13] bpf: add bpf_skc_to_tcp6_sock() helper

Yonghong Song <yhs@xxxxxx> · Thu, 18 Jun 2020 16:31:33 -0700

On 6/18/20 1:54 PM, Martin KaFai Lau wrote:
On Wed, Jun 17, 2020 at 02:15:42PM -0700, Yonghong Song wrote:
The helper is used in tracing programs to cast a socket
pointer to a tcp6_sock pointer.
The return value could be NULL if the casting is illegal.

A new helper return type RET_PTR_TO_BTF_ID_OR_NULL is added
so the verifier is able to deduce proper return types for the helper.

Different from the previous BTF_ID based helpers,
the bpf_skc_to_tcp6_sock() argument can be several possible
btf_ids. More specifically, all possible socket data structures
with sock_common appearing in the first in the memory layout.
This patch only added socket types related to tcp and udp.

All possible argument btf_id and return value btf_id
for helper bpf_skc_to_tcp6_sock() are pre-calculcated and
cached. In the future, it is even possible to precompute
these btf_id's at kernel build time.

[ ... ]

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 07052d44bca1..e455aa09039b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -261,6 +261,7 @@ enum bpf_return_type {
  	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
  	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
  	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
+	RET_PTR_TO_BTF_ID_OR_NULL,	/* returns a pointer to a btf_id or NULL */
  };
  
  /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -283,6 +284,10 @@ struct bpf_func_proto {
  		enum bpf_arg_type arg_type[5];
  	};
  	int *btf_id; /* BTF ids of arguments */
+	bool (*check_btf_id)(u32 btf_id, u32 arg); /* If the argument could match
+						    * more than one btf id's.
+						    */
+	int *ret_btf_id; /* return value btf_id */
  };
  
  /* bpf_context is intentionally undefined structure. Pointer to bpf_context is
@@ -1196,6 +1201,10 @@ bool bpf_link_is_iter(struct bpf_link *link);
  struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
  int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
  
+void init_sock_cast_types(struct btf *btf);
CONFIG_NET may not be set.

Good catch, will add proper config guard in the next revision.



[ ... ]

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 34cde841ab68..22d90d47befa 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3735,10 +3735,12 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
  	return -EINVAL;
  }
  
-static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
+static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
  			  enum bpf_arg_type arg_type,
-			  struct bpf_call_arg_meta *meta)
+			  struct bpf_call_arg_meta *meta,
+			  const struct bpf_func_proto *fn)
  {
+	u32 regno = BPF_REG_1 + arg;
  	struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
  	enum bpf_reg_type expected_type, type = reg->type;
  	int err = 0;
@@ -3820,9 +3822,16 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
  		expected_type = PTR_TO_BTF_ID;
  		if (type != expected_type)
  			goto err_type;
-		if (reg->btf_id != meta->btf_id) {
-			verbose(env, "Helper has type %s got %s in R%d\n",
-				kernel_type_name(meta->btf_id),
+		if (!fn->check_btf_id) {
+			if (reg->btf_id != meta->btf_id) {
+				verbose(env, "Helper has type %s got %s in R%d\n",
+					kernel_type_name(meta->btf_id),
+					kernel_type_name(reg->btf_id), regno);
+
+				return -EACCES;
+			}
+		} else if (!fn->check_btf_id(reg->btf_id, arg + 1)) {
Why arg "+ 1"?

In verifier, arg starts from 0 (arguments 0 - 4). In func_proto, we have 
ARG1 - ARG5.

That is why I add one here. I think I can just use 0-4 range for arg 
parameter, it should be fine.


+			verbose(env, "Helper does not support %s in R%d\n",
  				kernel_type_name(reg->btf_id), regno);
  
  			return -EACCES;
@@ -4600,7 +4609,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
  	struct bpf_reg_state *regs;
  	struct bpf_call_arg_meta meta;
  	bool changes_data;
-	int i, err;
+	int i, err, ret_btf_id;
  
  	/* find function prototype */
  	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
@@ -4644,10 +4653,12 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
  	meta.func_id = func_id;
  	/* check args */
  	for (i = 0; i < 5; i++) {
-		err = btf_resolve_helper_id(&env->log, fn, i);
-		if (err > 0)
-			meta.btf_id = err;
-		err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta);
+		if (!fn->check_btf_id) {
+			err = btf_resolve_helper_id(&env->log, fn, i);
+			if (err > 0)
+				meta.btf_id = err;
+		}
+		err = check_func_arg(env, i, fn->arg_type[i], &meta, fn);
Nit. Since it is passing fn and i, may be skip passing
fn->arg_type[i] altogether?
Make sense, will do.

  		if (err)
  			return err;
  	}
@@ -4750,6 +4761,16 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
  		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
  		regs[BPF_REG_0].id = ++env->id_gen;
  		regs[BPF_REG_0].mem_size = meta.mem_size;
+	} else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL;
+		ret_btf_id = *fn->ret_btf_id;
+		if (ret_btf_id < 0) {
If btf_vmlinux is not available, is ret_btf_id == 0?

Yes, it is a global variable. Will change it to <= 0.


+			verbose(env, "invalid return type %d of func %s#%d\n",
+				fn->ret_type, func_id_name(func_id), func_id);
+			return err;
Is err correctly set at this point?

Typo, I mean return ret_btf_id. In Jiri's d_path patch, the btf_id are 
all non-negative values.

I may adopt the same convention in the next revision to make future 
conversion easier.


+		}
+		regs[BPF_REG_0].btf_id = ret_btf_id;
  	} else {
  		verbose(env, "unknown return type %d of func %s#%d\n",
  			fn->ret_type, func_id_name(func_id), func_id);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index afaec7e082d9..478c10d1ec33 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1515,6 +1515,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
  		return &bpf_skb_output_proto;
  	case BPF_FUNC_xdp_output:
  		return &bpf_xdp_output_proto;
+	case BPF_FUNC_skc_to_tcp6_sock:
+		return &bpf_skc_to_tcp6_sock_proto;
  #endif
  	case BPF_FUNC_seq_printf:
  		return prog->expected_attach_type == BPF_TRACE_ITER ?
diff --git a/net/core/filter.c b/net/core/filter.c
index 73395384afe2..faf6feedd78e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -9191,3 +9191,72 @@ void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog)
  {
  	bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(xdp), prev_prog, prog);
  }
+
+/* Define a list of socket types which can be the argument for
+ * skc_to_*_sock() helpers. All these sockets should have
+ * sock_common as the first argument in its memory layout.
+ */
+static const char *sock_cast_types[] = {
+	"inet_connection_sock",
+	"inet_request_sock",
+	"inet_sock",
+	"inet_timewait_sock",
+	"request_sock",
+	"sock",
+	"sock_common",
+	"tcp_sock",
+	"tcp_request_sock",
+	"tcp_timewait_sock",
+	"tcp6_sock",
+	"udp_sock",
+	"udp6_sock",
+};
+
+static int sock_cast_btf_ids[ARRAY_SIZE(sock_cast_types)];
+
+static bool check_arg_btf_id(u32 btf_id, u32 arg)
+{
+	int i;
+
+	/* only one argument, no need to check arg */
+	for (i = 0; i < ARRAY_SIZE(sock_cast_btf_ids); i++)
+		if (sock_cast_btf_ids[i] == btf_id)
+			return true;
+	return false;
+}
+
+BPF_CALL_1(bpf_skc_to_tcp6_sock, struct sock *, sk)
+{
+	/* add an explicit cast to struct tcp6_sock to force
+	 * debug_info type generation for it.
+	 */
+	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP &&
+	    sk->sk_family == AF_INET6)
+		return (unsigned long)(struct tcp6_sock *)sk;
+
+	return (unsigned long)NULL;
+}
+
+static int bpf_skc_to_tcp6_sock_ret_btf_id;
+const struct bpf_func_proto bpf_skc_to_tcp6_sock_proto = {
+	.func			= bpf_skc_to_tcp6_sock,
+	.gpl_only		= true,
+	.ret_type		= RET_PTR_TO_BTF_ID_OR_NULL,
+	.arg1_type		= ARG_PTR_TO_BTF_ID,
+	.check_btf_id		= check_arg_btf_id,
+	.ret_btf_id		= &bpf_skc_to_tcp6_sock_ret_btf_id,
+};
+
+void init_sock_cast_types(struct btf *btf)
+{
+	char *ret_type_name;
+
+	/* find all possible argument btf_id's for socket cast helpers */
+	find_array_of_btf_ids(btf, sock_cast_types, sock_cast_btf_ids,
+			      ARRAY_SIZE(sock_cast_types));
+
+	/* find return btf_id */
+	ret_type_name = "tcp6_sock";
+	find_array_of_btf_ids(btf, &ret_type_name,
+			      &bpf_skc_to_tcp6_sock_ret_btf_id, 1);
Instead of re-finding tcp6_sock/tcp_sock/request_sock...etc,
can the sock_cast_btf_ids[] be reused?
Actually, yes, we can. Will do.