[PATCH bpf-next v1 3/5] bpf: Introduce bpf_packet_pointer helper to do DPA

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Introduce a new helper 'bpf_packet_pointer', that returns a packet
pointer to a linear area in a possibly multi-buffer XDP buff. Earlier,
user had to use bpf_xdp_load_bytes and bpf_xdp_store_bytes to read from
and write to multi-bufer XDP buff, but this led to a memcpy for an ideal
case (where we detect a linear area in the initial frame or frags).
Instead, we can expose the bpf_packet_pointer function, and return a
packet pointer with a fixed range, so that user can do direct packet
access in the contiguous region.

The name bpf_packet_pointer is chosen so this helper can also be
implemented for TC programs in the future, using skb as ctx.

The helper either returns the pointer to linear contiguous area, or NULL
if it fails to find one. In that case, user can resort to the existing
helpers to do access across frame or frag boundaries. The case of offset
+ len > xdp_get_buff_len is still rejected, but the user can already
check for that beforehand so the error code is dropped for it, and NULL
is returned.

We use the support for ARG_SCALAR, ARG_CONSTANT, and pkt_uid for
PTR_TO_PACKET in this commit. First, it is enforced that offset is only
in range [0, 0xffff], and that len is a constant, with value in range
[1, 0xffff]. Then, we introduce ret_pkt_len member in bpf_call_arg_meta
to remember the length to set for the returned packet pointer. A fresh
ID is assigned to pkt_uid on each call, so that comparisons of these
PTR_TO_PACKET is rejected with existing packet pointers obtained from
ctx or other calls to bpf_packet_pointer, to prevent range manipulation.
The existing bpf_xdp_load_bytes/bpf_xdp_store_bytes now do a call to
bpf_xdp_copy_buf directly. The intended usage is that user first calls
bpf_packet_pointer, and on receiving NULL from the call, invokes these
'slow path' helpers that handle the access across head/frag boundary.

Note that the reason we choose PTR_TO_PACKET as the return value, and
not PTR_TO_MEM with a fixed mem_size, is because these pointers need
to be invalided (by clear_all_pkt_pointers) when a helper that changes
packet is invoked. Instead of special casing PTR_TO_MEM for that
purpose, it is better to adjust PTR_TO_PACKET to work for this mode with
minimal additions on the verifier side (from previous commit). Also, the
verifier errors related to bad access mention pkt pointer and not
pointer to memory, which is more meaningful to the BPF programmer.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
---
 include/linux/bpf.h            |  2 ++
 include/uapi/linux/bpf.h       | 12 +++++++++
 kernel/bpf/verifier.c          | 37 ++++++++++++++++++++++++++
 net/core/filter.c              | 48 +++++++++++++++++-----------------
 tools/include/uapi/linux/bpf.h | 12 +++++++++
 5 files changed, 87 insertions(+), 24 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 7841d90b83df..981e87c64e47 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -421,6 +421,7 @@ enum bpf_return_type {
 	RET_PTR_TO_ALLOC_MEM,		/* returns a pointer to dynamically allocated memory */
 	RET_PTR_TO_MEM_OR_BTF_ID,	/* returns a pointer to a valid memory or a btf_id */
 	RET_PTR_TO_BTF_ID,		/* returns a pointer to a btf_id */
+	RET_PTR_TO_PACKET,		/* returns a pointer to a packet */
 	__BPF_RET_TYPE_MAX,
 
 	/* Extended ret_types. */
@@ -430,6 +431,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCK_COMMON_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON,
 	RET_PTR_TO_ALLOC_MEM_OR_NULL	= PTR_MAYBE_NULL | MEM_ALLOC | RET_PTR_TO_ALLOC_MEM,
 	RET_PTR_TO_BTF_ID_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID,
+	RET_PTR_TO_PACKET_OR_NULL	= PTR_MAYBE_NULL | RET_PTR_TO_PACKET,
 
 	/* This must be the last entry. Its purpose is to ensure the enum is
 	 * wide enough to hold the higher bits reserved for bpf_type_flag.
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4eebea830613..3736cfbb325e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5117,6 +5117,17 @@ union bpf_attr {
  *		0 on success.
  *		**-EINVAL** for invalid input
  *		**-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *
+ * void *bpf_packet_pointer(void *ctx, u32 offset, u32 len)
+ *	Description
+ *		Return a pointer to linear area in packet at *offset* of length
+ *		*len*. The returned packet pointer cannot be compared to any
+ *		other packet pointers.
+ *
+ *		This helper is only available to XDP programs.
+ *	Return
+ *		Pointer to packet on success that can be accessed for *len*
+ *		bytes, or NULL when it fails.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5312,6 +5323,7 @@ union bpf_attr {
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
 	FN(skb_set_delivery_time),      \
+	FN(packet_pointer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 88ac2c833bed..e6e494e07f4c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -257,6 +257,7 @@ struct bpf_call_arg_meta {
 	struct btf *ret_btf;
 	u32 ret_btf_id;
 	u32 subprogno;
+	int ret_pkt_len;
 };
 
 struct btf *btf_vmlinux;
@@ -5654,6 +5655,32 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			verbose(env, "R%d is not a known constant\n", regno);
 			return -EACCES;
 		}
+
+		if (meta->func_id == BPF_FUNC_packet_pointer) {
+			struct tnum range;
+
+			switch (arg + 1) {
+			case 2:
+				/* arg2 = offset, enforce that the range is [0, 0xffff] */
+				range = tnum_range(0, 0xffff);
+				if (!tnum_in(range, reg->var_off)) {
+					verbose(env, "R%d must be in range [0, 0xffff]\n", regno);
+					return -EINVAL;
+				}
+				break;
+			case 3:
+				/* arg3 = len, already checked to be constant */
+				if (!reg->var_off.value || reg->var_off.value > 0xffff) {
+					verbose(env, "R%d must be in range [1, 0xffff]\n", regno);
+					return -EINVAL;
+				}
+				meta->ret_pkt_len = reg->var_off.value;
+				break;
+			default:
+				verbose(env, "verifier internal error: bpf_xdp_pointer unknown arg\n");
+				return -EFAULT;
+			}
+		}
 	}
 
 	return err;
@@ -6873,6 +6900,16 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		 */
 		regs[BPF_REG_0].btf = btf_vmlinux;
 		regs[BPF_REG_0].btf_id = ret_btf_id;
+	} else if (base_type(ret_type) == RET_PTR_TO_PACKET) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_PACKET | ret_flag;
+		regs[BPF_REG_0].pkt_uid = ++env->id_gen;
+		if (!meta.ret_pkt_len) {
+			verbose(env, "verifier internal error: ret_pkt_len unset\n");
+			return -EFAULT;
+		}
+		/* Already checked to be in range [1, 0xffff] */
+		regs[BPF_REG_0].range = meta.ret_pkt_len;
 	} else {
 		verbose(env, "unknown return type %u of func %s#%d\n",
 			base_type(ret_type), func_id_name(func_id), func_id);
diff --git a/net/core/filter.c b/net/core/filter.c
index 88767f7da150..4fc19b9e64c7 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3889,18 +3889,15 @@ static void bpf_xdp_copy_buf(struct xdp_buff *xdp, unsigned long off,
 	}
 }
 
-static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
+BPF_CALL_3(bpf_xdp_pointer, struct xdp_buff *, xdp, u32, offset, u32, len)
 {
 	struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
 	u32 size = xdp->data_end - xdp->data;
 	void *addr = xdp->data;
 	int i;
 
-	if (unlikely(offset > 0xffff || len > 0xffff))
-		return ERR_PTR(-EFAULT);
-
 	if (offset + len > xdp_get_buff_len(xdp))
-		return ERR_PTR(-EINVAL);
+		return (unsigned long)NULL;
 
 	if (offset < size) /* linear area */
 		goto out;
@@ -3917,23 +3914,28 @@ static void *bpf_xdp_pointer(struct xdp_buff *xdp, u32 offset, u32 len)
 		offset -= frag_size;
 	}
 out:
-	return offset + len < size ? addr + offset : NULL;
+	return offset + len < size ? (unsigned long)addr + offset : (unsigned long)NULL;
 }
 
+static const struct bpf_func_proto bpf_xdp_pointer_proto = {
+	.func		= bpf_xdp_pointer,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_PACKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_SCALAR,
+	.arg3_type	= ARG_CONSTANT,
+};
+
 BPF_CALL_4(bpf_xdp_load_bytes, struct xdp_buff *, xdp, u32, offset,
 	   void *, buf, u32, len)
 {
-	void *ptr;
-
-	ptr = bpf_xdp_pointer(xdp, offset, len);
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
+	if (unlikely(offset > 0xffff || len > 0xffff))
+		return -EFAULT;
 
-	if (!ptr)
-		bpf_xdp_copy_buf(xdp, offset, buf, len, false);
-	else
-		memcpy(buf, ptr, len);
+	if (offset + len > xdp_get_buff_len(xdp))
+		return -EINVAL;
 
+	bpf_xdp_copy_buf(xdp, offset, buf, len, false);
 	return 0;
 }
 
@@ -3950,17 +3952,13 @@ static const struct bpf_func_proto bpf_xdp_load_bytes_proto = {
 BPF_CALL_4(bpf_xdp_store_bytes, struct xdp_buff *, xdp, u32, offset,
 	   void *, buf, u32, len)
 {
-	void *ptr;
-
-	ptr = bpf_xdp_pointer(xdp, offset, len);
-	if (IS_ERR(ptr))
-		return PTR_ERR(ptr);
+	if (unlikely(offset > 0xffff || len > 0xffff))
+		return -EFAULT;
 
-	if (!ptr)
-		bpf_xdp_copy_buf(xdp, offset, buf, len, true);
-	else
-		memcpy(ptr, buf, len);
+	if (offset + len > xdp_get_buff_len(xdp))
+		return -EINVAL;
 
+	bpf_xdp_copy_buf(xdp, offset, buf, len, true);
 	return 0;
 }
 
@@ -7820,6 +7818,8 @@ xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_xdp_load_bytes_proto;
 	case BPF_FUNC_xdp_store_bytes:
 		return &bpf_xdp_store_bytes_proto;
+	case BPF_FUNC_packet_pointer:
+		return &bpf_xdp_pointer_proto;
 	case BPF_FUNC_fib_lookup:
 		return &bpf_xdp_fib_lookup_proto;
 	case BPF_FUNC_check_mtu:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4eebea830613..3736cfbb325e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5117,6 +5117,17 @@ union bpf_attr {
  *		0 on success.
  *		**-EINVAL** for invalid input
  *		**-EOPNOTSUPP** for unsupported delivery_time_type and protocol
+ *
+ * void *bpf_packet_pointer(void *ctx, u32 offset, u32 len)
+ *	Description
+ *		Return a pointer to linear area in packet at *offset* of length
+ *		*len*. The returned packet pointer cannot be compared to any
+ *		other packet pointers.
+ *
+ *		This helper is only available to XDP programs.
+ *	Return
+ *		Pointer to packet on success that can be accessed for *len*
+ *		bytes, or NULL when it fails.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5312,6 +5323,7 @@ union bpf_attr {
 	FN(xdp_store_bytes),		\
 	FN(copy_from_user_task),	\
 	FN(skb_set_delivery_time),      \
+	FN(packet_pointer),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
2.35.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux