Re: [PATCH v1 bpf-next 4/6] bpf: Support __kptr to local kptrs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Mar 09, 2023 at 10:01:09AM -0800, Dave Marchevsky wrote:
> If a PTR_TO_BTF_ID type comes from program BTF - not vmlinux or module
> BTF - it must have been allocated by bpf_obj_new and therefore must be
> free'd with bpf_obj_drop. Such a PTR_TO_BTF_ID is considered a "local
> kptr" and is tagged with MEM_ALLOC type tag by bpf_obj_new.
> 
> This patch adds support for treating __kptr-tagged pointers to "local
> kptrs" as having an implicit bpf_obj_drop destructor for referenced kptr
> acquire / release semantics. Consider the following example:
> 
>   struct node_data {
>           long key;
>           long data;
>           struct bpf_rb_node node;
>   };
> 
>   struct map_value {
>           struct node_data __kptr *node;
>   };
> 
>   struct {
>           __uint(type, BPF_MAP_TYPE_ARRAY);
>           __type(key, int);
>           __type(value, struct map_value);
>           __uint(max_entries, 1);
>   } some_nodes SEC(".maps");
> 
> If struct node_data had a matching definition in kernel BTF, the verifier would
> expect a destructor for the type to be registered. Since struct node_data does
> not match any type in kernel BTF, the verifier knows that there is no kfunc
> that provides a PTR_TO_BTF_ID to this type, and that such a PTR_TO_BTF_ID can
> only come from bpf_obj_new. So instead of searching for a registered dtor,
> a bpf_obj_drop dtor can be assumed.
> 
> This allows the runtime to properly destruct such kptrs in
> bpf_obj_free_fields, which enables maps to clean up map_vals w/ such
> kptrs when going away.
> 
> Implementation notes:
>   * "kernel_btf" variable is renamed to "kptr_btf" in btf_parse_kptr.
>     Before this patch, the variable would only ever point to vmlinux or
>     module BTFs, but now it can point to some program BTF for local kptr
>     type. It's later used to populate the (btf, btf_id) pair in kptr btf
>     field.
>   * It's necessary to btf_get the program BTF when populating btf_field
>     for local kptr. btf_record_free later does a btf_put.
>   * Behavior for non-local referenced kptrs is not modified, as
>     bpf_find_btf_id helper only searches vmlinux and module BTFs for
>     matching BTF type. If such a type is found, btf_field_kptr's btf will
>     pass btf_is_kernel check, and the associated release function is
>     some one-argument dtor. If btf_is_kernel check fails, associated
>     release function is two-arg bpf_obj_drop_impl. Before this patch
>     only btf_field_kptr's w/ kernel or module BTFs were created.
> 
> Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>
> ---
>  include/linux/bpf.h  | 11 ++++++++++-
>  include/linux/btf.h  |  2 --
>  kernel/bpf/btf.c     | 34 +++++++++++++++++++++++++---------
>  kernel/bpf/helpers.c | 11 ++++++++---
>  kernel/bpf/syscall.c | 14 +++++++++++++-
>  5 files changed, 56 insertions(+), 16 deletions(-)
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 3a38db315f7f..756b85f0d0d3 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -189,10 +189,19 @@ enum btf_field_type {
>  				 BPF_RB_NODE | BPF_RB_ROOT,
>  };
>  
> +typedef void (*btf_dtor_kfunc_t)(void *);
> +typedef void (*btf_dtor_obj_drop)(void *, const struct btf_record *);
> +
>  struct btf_field_kptr {
>  	struct btf *btf;
>  	struct module *module;
> -	btf_dtor_kfunc_t dtor;
> +	union {
> +		/* dtor used if btf_is_kernel(btf), otherwise the type
> +		 * is program-allocated and obj_drop is used
> +		 */
> +		btf_dtor_kfunc_t dtor;
> +		btf_dtor_obj_drop obj_drop;
> +	};
>  	u32 btf_id;
>  };
>  
> diff --git a/include/linux/btf.h b/include/linux/btf.h
> index 1bba0827e8c4..d53b10cc55f2 100644
> --- a/include/linux/btf.h
> +++ b/include/linux/btf.h
> @@ -121,8 +121,6 @@ struct btf_struct_metas {
>  	struct btf_struct_meta types[];
>  };
>  
> -typedef void (*btf_dtor_kfunc_t)(void *);
> -
>  extern const struct file_operations btf_fops;
>  
>  void btf_get(struct btf *btf);
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 37779ceefd09..165a8ef038f5 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -3551,12 +3551,17 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
>  	return -EINVAL;
>  }
>  
> +extern void __bpf_obj_drop_impl(void *p, const struct btf_record *rec);
> +
>  static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
>  			  struct btf_field_info *info)
>  {
>  	struct module *mod = NULL;
>  	const struct btf_type *t;
> -	struct btf *kernel_btf;
> +	/* If a matching btf type is found in kernel or module BTFs, kptr_ref
> +	 * is that BTF, otherwise it's program BTF
> +	 */
> +	struct btf *kptr_btf;
>  	int ret;
>  	s32 id;
>  
> @@ -3565,7 +3570,17 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
>  	 */
>  	t = btf_type_by_id(btf, info->kptr.type_id);
>  	id = bpf_find_btf_id(__btf_name_by_offset(btf, t->name_off), BTF_INFO_KIND(t->info),
> -			     &kernel_btf);
> +			     &kptr_btf);
> +	if (id == -ENOENT && !btf_is_kernel(btf)) {

btf_is_kernel(btf) is confusing.
btf_parse()->btf_parse_struct_metas()->btf_parse_fields() is only called for user loaded BTFs.
We can do WARN_ON_ONCE(btf_is_kernel(btf)); here as a way to document it,
but checking it looks wrong.

> +		/* Type exists only in program BTF. Assume that it's a MEM_ALLOC
> +		 * kptr allocated via bpf_obj_new
> +		 */
> +		field->kptr.dtor = (void *)&__bpf_obj_drop_impl;
> +		id = info->kptr.type_id;
> +		kptr_btf = (struct btf *)btf;
> +		btf_get(kptr_btf);
> +		goto found_dtor;
> +	}
>  	if (id < 0)
>  		return id;
>  
> @@ -3582,20 +3597,20 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
>  		 * can be used as a referenced pointer and be stored in a map at
>  		 * the same time.
>  		 */
> -		dtor_btf_id = btf_find_dtor_kfunc(kernel_btf, id);
> +		dtor_btf_id = btf_find_dtor_kfunc(kptr_btf, id);
>  		if (dtor_btf_id < 0) {
>  			ret = dtor_btf_id;
>  			goto end_btf;
>  		}
>  
> -		dtor_func = btf_type_by_id(kernel_btf, dtor_btf_id);
> +		dtor_func = btf_type_by_id(kptr_btf, dtor_btf_id);
>  		if (!dtor_func) {
>  			ret = -ENOENT;
>  			goto end_btf;
>  		}
>  
> -		if (btf_is_module(kernel_btf)) {
> -			mod = btf_try_get_module(kernel_btf);
> +		if (btf_is_module(kptr_btf)) {
> +			mod = btf_try_get_module(kptr_btf);
>  			if (!mod) {
>  				ret = -ENXIO;
>  				goto end_btf;
> @@ -3605,7 +3620,7 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
>  		/* We already verified dtor_func to be btf_type_is_func
>  		 * in register_btf_id_dtor_kfuncs.
>  		 */
> -		dtor_func_name = __btf_name_by_offset(kernel_btf, dtor_func->name_off);
> +		dtor_func_name = __btf_name_by_offset(kptr_btf, dtor_func->name_off);
>  		addr = kallsyms_lookup_name(dtor_func_name);
>  		if (!addr) {
>  			ret = -EINVAL;
> @@ -3614,14 +3629,15 @@ static int btf_parse_kptr(const struct btf *btf, struct btf_field *field,
>  		field->kptr.dtor = (void *)addr;
>  	}
>  
> +found_dtor:
>  	field->kptr.btf_id = id;
> -	field->kptr.btf = kernel_btf;
> +	field->kptr.btf = kptr_btf;
>  	field->kptr.module = mod;
>  	return 0;
>  end_mod:
>  	module_put(mod);
>  end_btf:
> -	btf_put(kernel_btf);
> +	btf_put(kptr_btf);
>  	return ret;
>  }
>  
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index f9b7eeedce08..77d64b6951b9 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -1896,14 +1896,19 @@ __bpf_kfunc void *bpf_obj_new_impl(u64 local_type_id__k, void *meta__ign)
>  	return p;
>  }
>  
> +void __bpf_obj_drop_impl(void *p, const struct btf_record *rec)
> +{
> +	if (rec)
> +		bpf_obj_free_fields(rec, p);
> +	bpf_mem_free(&bpf_global_ma, p);
> +}
> +
>  __bpf_kfunc void bpf_obj_drop_impl(void *p__alloc, void *meta__ign)
>  {
>  	struct btf_struct_meta *meta = meta__ign;
>  	void *p = p__alloc;
>  
> -	if (meta)
> -		bpf_obj_free_fields(meta->record, p);
> -	bpf_mem_free(&bpf_global_ma, p);
> +	__bpf_obj_drop_impl(p, meta ? meta->record : NULL);
>  }
>  
>  static void __bpf_list_add(struct bpf_list_node *node, struct bpf_list_head *head, bool tail)
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index cc4b7684910c..0684febc447a 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -659,8 +659,10 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
>  		return;
>  	fields = rec->fields;
>  	for (i = 0; i < rec->cnt; i++) {
> +		struct btf_struct_meta *pointee_struct_meta;
>  		const struct btf_field *field = &fields[i];
>  		void *field_ptr = obj + field->offset;
> +		void *xchgd_field;
>  
>  		switch (fields[i].type) {
>  		case BPF_SPIN_LOCK:
> @@ -672,7 +674,17 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
>  			WRITE_ONCE(*(u64 *)field_ptr, 0);
>  			break;
>  		case BPF_KPTR_REF:
> -			field->kptr.dtor((void *)xchg((unsigned long *)field_ptr, 0));
> +			xchgd_field = (void *)xchg((unsigned long *)field_ptr, 0);
> +			if (!btf_is_kernel(field->kptr.btf)) {
> +				pointee_struct_meta = btf_find_struct_meta(field->kptr.btf,
> +									   field->kptr.btf_id);
> +				WARN_ON_ONCE(!pointee_struct_meta);
> +				field->kptr.obj_drop(xchgd_field, pointee_struct_meta ?
> +								  pointee_struct_meta->record :
> +								  NULL);
> +			} else {
> +				field->kptr.dtor(xchgd_field);
> +			}
>  			break;
>  		case BPF_LIST_HEAD:
>  			if (WARN_ON_ONCE(rec->spin_lock_off < 0))
> -- 
> 2.34.1
> 



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux