[PATCH bpf-next v1 3/7] bpf: Add bpf_dynptr_from_mem, bpf_malloc, bpf_free

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Joanne Koong <joannelkoong@xxxxxxxxx>

This patch adds 3 new APIs and the bulk of the verifier work for
supporting dynamic pointers in bpf.

There are different types of dynptrs. This patch starts with the most
basic ones, ones that reference a program's local memory
(eg a stack variable) and ones that reference memory that is dynamically
allocated on behalf of the program. If the memory is dynamically
allocated by the program, the program *must* free it before the program
exits. This is enforced by the verifier.

The added APIs are:

long bpf_dynptr_from_mem(void *data, u32 size, struct bpf_dynptr *ptr);
long bpf_malloc(u32 size, struct bpf_dynptr *ptr);
void bpf_free(struct bpf_dynptr *ptr);

This patch sets up the verifier to support dynptrs. Dynptrs will always
reside on the program's stack frame. As such, their state is tracked
in their corresponding stack slot, which includes the type of dynptr
(DYNPTR_LOCAL vs. DYNPTR_MALLOC).

When the program passes in an uninitialized dynptr (ARG_PTR_TO_DYNPTR |
MEM_UNINIT), the stack slots corresponding to the frame pointer
where the dynptr resides at is marked as STACK_DYNPTR. For helper functions
that take in iniitalized dynptrs (such as the next patch in this series
which supports dynptr reads/writes), the verifier enforces that the
dynptr has been initialized by checking that their corresponding stack
slots have been marked as STACK_DYNPTR. Dynptr release functions
(eg bpf_free) will clear the stack slots. The verifier enforces at program
exit that there are no dynptr stack slots that need to be released.

There are other constraints that are enforced by the verifier as
well, such as that the dynptr cannot be written to directly by the bpf
program or by non-dynptr helper functions. The last patch in this series
contains tests that trigger different cases that the verifier needs to
successfully reject.

Signed-off-by: Joanne Koong <joannelkoong@xxxxxxxxx>
---
 include/linux/bpf.h            |  74 ++++++++-
 include/linux/bpf_verifier.h   |  18 +++
 include/uapi/linux/bpf.h       |  40 +++++
 kernel/bpf/helpers.c           |  88 +++++++++++
 kernel/bpf/verifier.c          | 266 ++++++++++++++++++++++++++++++++-
 scripts/bpf_doc.py             |   2 +
 tools/include/uapi/linux/bpf.h |  40 +++++
 7 files changed, 521 insertions(+), 7 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cb9f42866cde..e0fcff9f2aee 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -346,7 +346,13 @@ enum bpf_type_flag {
 
 	MEM_RELEASE		= BIT(6 + BPF_BASE_TYPE_BITS),
 
-	__BPF_TYPE_LAST_FLAG	= MEM_RELEASE,
+	/* DYNPTR points to a program's local memory (eg stack variable). */
+	DYNPTR_TYPE_LOCAL	= BIT(7 + BPF_BASE_TYPE_BITS),
+
+	/* DYNPTR points to dynamically allocated memory. */
+	DYNPTR_TYPE_MALLOC	= BIT(8 + BPF_BASE_TYPE_BITS),
+
+	__BPF_TYPE_LAST_FLAG	= DYNPTR_TYPE_MALLOC,
 };
 
 /* Max number of base types. */
@@ -390,6 +396,7 @@ enum bpf_arg_type {
 	ARG_PTR_TO_STACK,	/* pointer to stack */
 	ARG_PTR_TO_CONST_STR,	/* pointer to a null terminated read-only string */
 	ARG_PTR_TO_TIMER,	/* pointer to bpf_timer */
+	ARG_PTR_TO_DYNPTR,      /* pointer to bpf_dynptr. See bpf_type_flag for dynptr type */
 	__BPF_ARG_TYPE_MAX,
 
 	/* Extended arg_types. */
@@ -2396,4 +2403,69 @@ int bpf_bprintf_prepare(char *fmt, u32 fmt_size, const u64 *raw_args,
 			u32 **bin_buf, u32 num_args);
 void bpf_bprintf_cleanup(void);
 
+/* the implementation of the opaque uapi struct bpf_dynptr */
+struct bpf_dynptr_kern {
+	u8 *data;
+	/* The upper 4 bits are reserved. Bit 29 denotes whether the
+	 * dynptr is read-only. Bits 30 - 32 denote the dynptr type.
+	 */
+	u32 size;
+	u32 offset;
+} __aligned(8);
+
+enum bpf_dynptr_type {
+	/* Local memory used by the bpf program (eg stack variable) */
+	BPF_DYNPTR_TYPE_LOCAL,
+	/* Memory allocated dynamically by the kernel for the dynptr */
+	BPF_DYNPTR_TYPE_MALLOC,
+};
+
+/* The upper 4 bits of dynptr->size are reserved. Consequently, the
+ * maximum supported size is 2^28 - 1.
+ */
+#define DYNPTR_MAX_SIZE	((1UL << 28) - 1)
+#define DYNPTR_SIZE_MASK	0xFFFFFFF
+#define DYNPTR_TYPE_SHIFT	29
+
+static inline enum bpf_dynptr_type bpf_dynptr_get_type(struct bpf_dynptr_kern *ptr)
+{
+	return ptr->size >> DYNPTR_TYPE_SHIFT;
+}
+
+static inline void bpf_dynptr_set_type(struct bpf_dynptr_kern *ptr, enum bpf_dynptr_type type)
+{
+	ptr->size |= type << DYNPTR_TYPE_SHIFT;
+}
+
+static inline u32 bpf_dynptr_get_size(struct bpf_dynptr_kern *ptr)
+{
+	return ptr->size & DYNPTR_SIZE_MASK;
+}
+
+static inline int bpf_dynptr_check_size(u32 size)
+{
+	if (size == 0)
+		return -EINVAL;
+
+	if (size > DYNPTR_MAX_SIZE)
+		return -E2BIG;
+
+	return 0;
+}
+
+static inline int bpf_dynptr_check_off_len(struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+{
+	u32 capacity = bpf_dynptr_get_size(ptr) - ptr->offset;
+
+	if (len > capacity || offset > capacity - len)
+		return -EINVAL;
+
+	return 0;
+}
+
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type,
+		     u32 offset, u32 size);
+
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 7a01adc9e13f..bc0f105148f9 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -72,6 +72,18 @@ struct bpf_reg_state {
 
 		u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
 
+		/* for dynptr stack slots */
+		struct {
+			enum bpf_dynptr_type dynptr_type;
+			/* A dynptr is 16 bytes so it takes up 2 stack slots.
+			 * We need to track which slot is the first slot
+			 * to protect against cases where the user may try to
+			 * pass in an address starting at the second slot of the
+			 * dynptr.
+			 */
+			bool dynptr_first_slot;
+		};
+
 		/* Max size from any of the above. */
 		struct {
 			unsigned long raw1;
@@ -174,9 +186,15 @@ enum bpf_stack_slot_type {
 	STACK_SPILL,      /* register spilled into stack */
 	STACK_MISC,	  /* BPF program wrote some data into this slot */
 	STACK_ZERO,	  /* BPF program wrote constant zero */
+	/* A dynptr is stored in this stack slot. The type of dynptr
+	 * is stored in bpf_stack_state->spilled_ptr.type
+	 */
+	STACK_DYNPTR,
 };
 
 #define BPF_REG_SIZE 8	/* size of eBPF register in bytes */
+#define BPF_DYNPTR_SIZE 16 /* size of a struct bpf_dynptr in bytes */
+#define BPF_DYNPTR_NR_SLOTS 2
 
 struct bpf_stack_state {
 	struct bpf_reg_state spilled_ptr;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..6a57d8a1b882 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5143,6 +5143,38 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * long bpf_dynptr_from_mem(void *data, u32 size, struct bpf_dynptr *ptr)
+ *	Description
+ *		Get a dynptr to local memory *data*.
+ *
+ *		For a dynptr to a dynamic memory allocation, please use bpf_malloc
+ *		instead.
+ *
+ *		The maximum *size* supported is DYNPTR_MAX_SIZE.
+ *	Return
+ *		0 on success or -EINVAL if the size is 0 or exceeds DYNPTR_MAX_SIZE.
+ *
+ * long bpf_malloc(u32 size, struct bpf_dynptr *ptr)
+ *	Description
+ *		Dynamically allocate memory of *size* bytes.
+ *
+ *		Every call to bpf_malloc must have a corresponding
+ *		bpf_free, regardless of whether the bpf_malloc
+ *		succeeded.
+ *
+ *		The maximum *size* supported is DYNPTR_MAX_SIZE.
+ *	Return
+ *		0 on success, -ENOMEM if there is not enough memory for the
+ *		allocation, -EINVAL if the size is 0 or exceeds DYNPTR_MAX_SIZE.
+ *
+ * void bpf_free(struct bpf_dynptr *ptr)
+ *	Description
+ *		Free memory allocated by bpf_malloc.
+ *
+ *		After this operation, *ptr* will be an invalidated dynptr.
+ *	Return
+ *		Void.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5339,6 +5371,9 @@ union bpf_attr {
 	FN(copy_from_user_task),	\
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
+	FN(dynptr_from_mem),		\
+	FN(malloc),			\
+	FN(free),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6486,6 +6521,11 @@ struct bpf_timer {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_dynptr {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index cc6d480c5c23..ed5a7d9d0a18 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1374,6 +1374,88 @@ void bpf_timer_cancel_and_free(void *val)
 	kfree(t);
 }
 
+void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, enum bpf_dynptr_type type,
+		     u32 offset, u32 size)
+{
+	ptr->data = data;
+	ptr->offset = offset;
+	ptr->size = size;
+	bpf_dynptr_set_type(ptr, type);
+}
+
+void bpf_dynptr_set_null(struct bpf_dynptr_kern *ptr)
+{
+	memset(ptr, 0, sizeof(*ptr));
+}
+
+BPF_CALL_3(bpf_dynptr_from_mem, void *, data, u32, size, struct bpf_dynptr_kern *, ptr)
+{
+	int err;
+
+	err = bpf_dynptr_check_size(size);
+	if (err) {
+		bpf_dynptr_set_null(ptr);
+		return err;
+	}
+
+	bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_LOCAL, 0, size);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_dynptr_from_mem_proto = {
+	.func		= bpf_dynptr_from_mem,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_MEM,
+	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg3_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
+};
+
+BPF_CALL_2(bpf_malloc, u32, size, struct bpf_dynptr_kern *, ptr)
+{
+	void *data;
+	int err;
+
+	err = bpf_dynptr_check_size(size);
+	if (err) {
+		bpf_dynptr_set_null(ptr);
+		return err;
+	}
+
+	data = kmalloc(size, GFP_ATOMIC);
+	if (!data) {
+		bpf_dynptr_set_null(ptr);
+		return -ENOMEM;
+	}
+
+	bpf_dynptr_init(ptr, data, BPF_DYNPTR_TYPE_MALLOC, 0, size);
+
+	return 0;
+}
+
+const struct bpf_func_proto bpf_malloc_proto = {
+	.func		= bpf_malloc,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_ANYTHING,
+	.arg2_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_MALLOC | MEM_UNINIT,
+};
+
+BPF_CALL_1(bpf_free, struct bpf_dynptr_kern *, dynptr)
+{
+	kfree(dynptr->data);
+	bpf_dynptr_set_null(dynptr);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_free_proto = {
+	.func		= bpf_free,
+	.gpl_only	= false,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_MALLOC | MEM_RELEASE,
+};
+
 const struct bpf_func_proto bpf_get_current_task_proto __weak;
 const struct bpf_func_proto bpf_get_current_task_btf_proto __weak;
 const struct bpf_func_proto bpf_probe_read_user_proto __weak;
@@ -1426,6 +1508,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_loop_proto;
 	case BPF_FUNC_strncmp:
 		return &bpf_strncmp_proto;
+	case BPF_FUNC_dynptr_from_mem:
+		return &bpf_dynptr_from_mem_proto;
+	case BPF_FUNC_malloc:
+		return &bpf_malloc_proto;
+	case BPF_FUNC_free:
+		return &bpf_free_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 80e53303713e..cb3bcb54d4b4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -479,6 +479,11 @@ static bool type_is_release_mem(u32 type)
 	return type & MEM_RELEASE;
 }
 
+static bool type_is_uninit_mem(u32 type)
+{
+	return type & MEM_UNINIT;
+}
+
 static bool may_be_acquire_function(enum bpf_func_id func_id)
 {
 	return func_id == BPF_FUNC_sk_lookup_tcp ||
@@ -583,6 +588,7 @@ static char slot_type_char[] = {
 	[STACK_SPILL]	= 'r',
 	[STACK_MISC]	= 'm',
 	[STACK_ZERO]	= '0',
+	[STACK_DYNPTR]	= 'd',
 };
 
 static void print_liveness(struct bpf_verifier_env *env,
@@ -598,6 +604,18 @@ static void print_liveness(struct bpf_verifier_env *env,
 		verbose(env, "D");
 }
 
+static inline int get_spi(s32 off)
+{
+	return (-off - 1) / BPF_REG_SIZE;
+}
+
+static bool check_spi_bounds(struct bpf_func_state *state, int spi, u32 nr_slots)
+{
+	int allocated_slots = state->allocated_stack / BPF_REG_SIZE;
+
+	return allocated_slots > spi && nr_slots - 1 <= spi;
+}
+
 static struct bpf_func_state *func(struct bpf_verifier_env *env,
 				   const struct bpf_reg_state *reg)
 {
@@ -649,6 +667,133 @@ static void mark_verifier_state_scratched(struct bpf_verifier_env *env)
 	env->scratched_stack_slots = ~0ULL;
 }
 
+static int arg_to_dynptr_type(enum bpf_arg_type arg_type, enum bpf_dynptr_type *dynptr_type)
+{
+	int type = arg_type & (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_MALLOC);
+
+	switch (type) {
+	case DYNPTR_TYPE_LOCAL:
+		*dynptr_type = BPF_DYNPTR_TYPE_LOCAL;
+		break;
+	case DYNPTR_TYPE_MALLOC:
+		*dynptr_type = BPF_DYNPTR_TYPE_MALLOC;
+		break;
+	default:
+		/* Can't have more than one type set and can't have no
+		 * type set
+		 */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static bool dynptr_type_refcounted(struct bpf_func_state *state, int spi)
+{
+	enum bpf_dynptr_type type = state->stack[spi].spilled_ptr.dynptr_type;
+
+	return type == BPF_DYNPTR_TYPE_MALLOC;
+}
+
+static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+				   enum bpf_arg_type arg_type)
+{
+	struct bpf_func_state *state = cur_func(env);
+	enum bpf_dynptr_type type;
+	int spi, i, err;
+
+	spi = get_spi(reg->off);
+
+	if (!check_spi_bounds(state, spi, BPF_DYNPTR_NR_SLOTS))
+		return -EINVAL;
+
+	err = arg_to_dynptr_type(arg_type, &type);
+	if (unlikely(err))
+		return err;
+
+	for (i = 0; i < BPF_REG_SIZE; i++) {
+		state->stack[spi].slot_type[i] = STACK_DYNPTR;
+		state->stack[spi - 1].slot_type[i] = STACK_DYNPTR;
+	}
+
+	state->stack[spi].spilled_ptr.dynptr_type = type;
+	state->stack[spi - 1].spilled_ptr.dynptr_type = type;
+
+	state->stack[spi].spilled_ptr.dynptr_first_slot = true;
+
+	return 0;
+}
+
+static int unmark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
+{
+	struct bpf_func_state *state = func(env, reg);
+	int spi, i;
+
+	spi = get_spi(reg->off);
+
+	if (!check_spi_bounds(state, spi, BPF_DYNPTR_NR_SLOTS))
+		return -EINVAL;
+
+	for (i = 0; i < BPF_REG_SIZE; i++) {
+		state->stack[spi].slot_type[i] = STACK_INVALID;
+		state->stack[spi - 1].slot_type[i] = STACK_INVALID;
+	}
+
+	state->stack[spi].spilled_ptr.dynptr_type = 0;
+	state->stack[spi].spilled_ptr.dynptr_first_slot = 0;
+	state->stack[spi - 1].spilled_ptr.dynptr_type = 0;
+
+	return 0;
+}
+
+/* Check if the dynptr argument is a proper initialized dynptr */
+static bool check_dynptr_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+			      enum bpf_arg_type arg_type)
+{
+	struct bpf_func_state *state = func(env, reg);
+	enum bpf_dynptr_type expected_type;
+	int spi, err;
+
+	/* Can't pass in a dynptr at a weird offset */
+	if (reg->off % BPF_REG_SIZE)
+		return false;
+
+	spi = get_spi(reg->off);
+
+	if (!check_spi_bounds(state, spi, BPF_DYNPTR_NR_SLOTS))
+		return false;
+
+	if (!state->stack[spi].spilled_ptr.dynptr_first_slot)
+		return false;
+
+	if (state->stack[spi].slot_type[0] != STACK_DYNPTR)
+		return false;
+
+	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
+	if (arg_type == ARG_PTR_TO_DYNPTR)
+		return true;
+
+	err = arg_to_dynptr_type(arg_type, &expected_type);
+	if (unlikely(err))
+		return err;
+
+	return state->stack[spi].spilled_ptr.dynptr_type == expected_type;
+}
+
+static bool stack_access_into_dynptr(struct bpf_func_state *state, int spi, int size)
+{
+	int nr_slots, i;
+
+	nr_slots = min(roundup(size, BPF_REG_SIZE) / BPF_REG_SIZE, spi + 1);
+
+	for (i = 0; i < nr_slots; i++) {
+		if (state->stack[spi - i].slot_type[0] == STACK_DYNPTR)
+			return true;
+	}
+
+	return false;
+}
+
 /* The reg state of a pointer or a bounded scalar was saved when
  * it was spilled to the stack.
  */
@@ -2885,6 +3030,12 @@ static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
 	}
 
 	mark_stack_slot_scratched(env, spi);
+
+	if (stack_access_into_dynptr(state, spi, size)) {
+		verbose(env, "direct write into dynptr is not permitted\n");
+		return -EINVAL;
+	}
+
 	if (reg && !(off % BPF_REG_SIZE) && register_is_bounded(reg) &&
 	    !register_is_null(reg) && env->bpf_capable) {
 		if (dst_reg != BPF_REG_FP) {
@@ -3006,6 +3157,12 @@ static int check_stack_write_var_off(struct bpf_verifier_env *env,
 		slot = -i - 1;
 		spi = slot / BPF_REG_SIZE;
 		stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
+
+		if (*stype == STACK_DYNPTR) {
+			verbose(env, "direct write into dynptr is not permitted\n");
+			return -EINVAL;
+		}
+
 		mark_stack_slot_scratched(env, spi);
 
 		if (!env->allow_ptr_leaks
@@ -5153,6 +5310,16 @@ static bool arg_type_is_int_ptr(enum bpf_arg_type type)
 	       type == ARG_PTR_TO_LONG;
 }
 
+static inline bool arg_type_is_dynptr(enum bpf_arg_type type)
+{
+	return base_type(type) == ARG_PTR_TO_DYNPTR;
+}
+
+static inline bool arg_type_is_dynptr_uninit(enum bpf_arg_type type)
+{
+	return arg_type_is_dynptr(type) && type & MEM_UNINIT;
+}
+
 static int int_ptr_type_to_size(enum bpf_arg_type type)
 {
 	if (type == ARG_PTR_TO_INT)
@@ -5290,6 +5457,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_STACK]		= &stack_ptr_types,
 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
 	[ARG_PTR_TO_TIMER]		= &timer_types,
+	[ARG_PTR_TO_DYNPTR]		= &stack_ptr_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -5408,6 +5576,15 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
 	return __check_ptr_off_reg(env, reg, regno, fixed_off_ok);
 }
 
+/*
+ * Determines whether the id used for reference tracking is held in a stack slot
+ * or in a register
+ */
+static bool id_in_stack_slot(enum bpf_arg_type arg_type)
+{
+	return arg_type_is_dynptr(arg_type);
+}
+
 static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 			  struct bpf_call_arg_meta *meta,
 			  const struct bpf_func_proto *fn)
@@ -5458,10 +5635,19 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 		return err;
 
 	arg_release = type_is_release_mem(arg_type);
-	if (arg_release && !reg->ref_obj_id) {
-		verbose(env, "R%d arg #%d is an unacquired reference and hence cannot be released\n",
-			regno, arg + 1);
-		return -EINVAL;
+	if (arg_release) {
+		if (id_in_stack_slot(arg_type)) {
+			struct bpf_func_state *state = func(env, reg);
+			int spi = get_spi(reg->off);
+
+			if (!state->stack[spi].spilled_ptr.id)
+				goto unacquired_ref_err;
+		} else if (!reg->ref_obj_id)  {
+unacquired_ref_err:
+			verbose(env, "R%d arg #%d is an unacquired reference and hence cannot be released\n",
+				regno, arg + 1);
+			return -EINVAL;
+		}
 	}
 
 	err = check_func_arg_reg_off(env, reg, regno, arg_type, arg_release);
@@ -5572,6 +5758,40 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
 		bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
 
 		err = check_mem_size_reg(env, reg, regno, zero_size_allowed, meta);
+	} else if (arg_type_is_dynptr(arg_type)) {
+		bool initialized = check_dynptr_init(env, reg, arg_type);
+
+		if (type_is_uninit_mem(arg_type)) {
+			if (initialized) {
+				verbose(env, "Arg #%d dynptr cannot be an initialized dynptr\n",
+					arg + 1);
+				return -EINVAL;
+			}
+			meta->raw_mode = true;
+			err = check_helper_mem_access(env, regno, BPF_DYNPTR_SIZE, false, meta);
+			/* For now, we do not allow dynptrs to point to existing
+			 * refcounted memory
+			 */
+			if (reg_type_may_be_refcounted_or_null(regs[BPF_REG_1].type)) {
+				verbose(env, "Arg #%d dynptr memory cannot be potentially refcounted\n",
+					arg + 1);
+				return -EINVAL;
+			}
+		} else {
+			if (!initialized) {
+				char *err_extra = "";
+
+				if (arg_type & DYNPTR_TYPE_LOCAL)
+					err_extra = "local ";
+				else if (arg_type & DYNPTR_TYPE_MALLOC)
+					err_extra = "malloc ";
+				verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
+					err_extra, arg + 1);
+				return -EINVAL;
+			}
+			if (type_is_release_mem(arg_type))
+				err = unmark_stack_slots_dynptr(env, reg);
+		}
 	} else if (arg_type_is_alloc_size(arg_type)) {
 		if (!tnum_is_const(reg->var_off)) {
 			verbose(env, "R%d is not a known constant'\n",
@@ -6552,6 +6772,25 @@ static int check_reference_leak(struct bpf_verifier_env *env)
 	return state->acquired_refs ? -EINVAL : 0;
 }
 
+/* Called at BPF_EXIT to detect if there are any reference-tracked dynptrs that have
+ * not been released. Dynptrs to local memory do not need to be released.
+ */
+static int check_dynptr_unreleased(struct bpf_verifier_env *env)
+{
+	struct bpf_func_state *state = cur_func(env);
+	int i;
+
+	for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
+		if (state->stack[i].slot_type[0] == STACK_DYNPTR &&
+		    dynptr_type_refcounted(state, i)) {
+			verbose(env, "spi=%d is an unreleased dynptr\n", i);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static int check_bpf_snprintf_call(struct bpf_verifier_env *env,
 				   struct bpf_reg_state *regs)
 {
@@ -6693,6 +6932,14 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			return err;
 	}
 
+	regs = cur_regs(env);
+
+	for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
+		if (arg_type_is_dynptr_uninit(fn->arg_type[i]))
+			err = mark_stack_slots_dynptr(env, &regs[BPF_REG_1 + i],
+						      fn->arg_type[i]);
+	}
+
 	if (meta.ref_obj_id) {
 		err = release_reference(env, meta.ref_obj_id);
 		if (err) {
@@ -6702,8 +6949,6 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 		}
 	}
 
-	regs = cur_regs(env);
-
 	switch (func_id) {
 	case BPF_FUNC_tail_call:
 		err = check_reference_leak(env);
@@ -6711,6 +6956,11 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			verbose(env, "tail_call would lead to reference leak\n");
 			return err;
 		}
+		err = check_dynptr_unreleased(env);
+		if (err) {
+			verbose(env, "tail_call would lead to dynptr memory leak\n");
+			return err;
+		}
 		break;
 	case BPF_FUNC_get_local_storage:
 		/* check that flags argument in get_local_storage(map, flags) is 0,
@@ -11703,6 +11953,10 @@ static int do_check(struct bpf_verifier_env *env)
 					return -EINVAL;
 				}
 
+				err = check_dynptr_unreleased(env);
+				if (err)
+					return err;
+
 				if (state->curframe) {
 					/* exit from nested function */
 					err = prepare_func_exit(env, &env->insn_idx);
diff --git a/scripts/bpf_doc.py b/scripts/bpf_doc.py
index 096625242475..766dcbc73897 100755
--- a/scripts/bpf_doc.py
+++ b/scripts/bpf_doc.py
@@ -633,6 +633,7 @@ class PrinterHelpers(Printer):
             'struct socket',
             'struct file',
             'struct bpf_timer',
+            'struct bpf_dynptr',
     ]
     known_types = {
             '...',
@@ -682,6 +683,7 @@ class PrinterHelpers(Printer):
             'struct socket',
             'struct file',
             'struct bpf_timer',
+            'struct bpf_dynptr',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d14b10b85e51..6a57d8a1b882 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5143,6 +5143,38 @@ union bpf_attr {
  *		The **hash_algo** is returned on success,
  *		**-EOPNOTSUP** if the hash calculation failed or **-EINVAL** if
  *		invalid arguments are passed.
+ *
+ * long bpf_dynptr_from_mem(void *data, u32 size, struct bpf_dynptr *ptr)
+ *	Description
+ *		Get a dynptr to local memory *data*.
+ *
+ *		For a dynptr to a dynamic memory allocation, please use bpf_malloc
+ *		instead.
+ *
+ *		The maximum *size* supported is DYNPTR_MAX_SIZE.
+ *	Return
+ *		0 on success or -EINVAL if the size is 0 or exceeds DYNPTR_MAX_SIZE.
+ *
+ * long bpf_malloc(u32 size, struct bpf_dynptr *ptr)
+ *	Description
+ *		Dynamically allocate memory of *size* bytes.
+ *
+ *		Every call to bpf_malloc must have a corresponding
+ *		bpf_free, regardless of whether the bpf_malloc
+ *		succeeded.
+ *
+ *		The maximum *size* supported is DYNPTR_MAX_SIZE.
+ *	Return
+ *		0 on success, -ENOMEM if there is not enough memory for the
+ *		allocation, -EINVAL if the size is 0 or exceeds DYNPTR_MAX_SIZE.
+ *
+ * void bpf_free(struct bpf_dynptr *ptr)
+ *	Description
+ *		Free memory allocated by bpf_malloc.
+ *
+ *		After this operation, *ptr* will be an invalidated dynptr.
+ *	Return
+ *		Void.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5339,6 +5371,9 @@ union bpf_attr {
 	FN(copy_from_user_task),	\
 	FN(skb_set_tstamp),		\
 	FN(ima_file_hash),		\
+	FN(dynptr_from_mem),		\
+	FN(malloc),			\
+	FN(free),			\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -6486,6 +6521,11 @@ struct bpf_timer {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_dynptr {
+	__u64 :64;
+	__u64 :64;
+} __attribute__((aligned(8)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
-- 
2.30.2





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux