Re: [PATCH bpf-next v1 07/19] bpf: create anonymous bpf iterator

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Apr 27, 2020 at 01:12:42PM -0700, Yonghong Song wrote:
> A new bpf command BPF_ITER_CREATE is added.
> 
> The anonymous bpf iterator is seq_file based.
> The seq_file private data are referenced by targets.
> The bpf_iter infrastructure allocated additional space
> at seq_file->private after the space used by targets
> to store some meta data, e.g.,
>   prog:       prog to run
>   session_id: an unique id for each opened seq_file
>   seq_num:    how many times bpf programs are queried in this session
>   has_last:   indicate whether or not bpf_prog has been called after
>               all valid objects have been processed
> 
> A map between file and prog/link is established to help
> fops->release(). When fops->release() is called, just based on
> inode and file, bpf program cannot be located since target
> seq_priv_size not available. This map helps retrieve the prog
> whose reference count needs to be decremented.
How about putting "struct extra_priv_data" at the beginning of
the seq_file's private store instead since its size is known.
seq->private can point to an aligned byte after
+sizeof(struct extra_priv_data).

[ ... ]

> diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
> index fc1ce5ee5c3f..1f4e778d1814 100644
> --- a/kernel/bpf/bpf_iter.c
> +++ b/kernel/bpf/bpf_iter.c

[ ... ]

> @@ -26,6 +40,50 @@ static bool bpf_iter_inited = false;
>  /* protect bpf_iter_link.link->prog upddate */
>  static struct mutex bpf_iter_mutex;
>  
> +/* Since at anon seq_file release function, the prog cannot
> + * be retrieved since target seq_priv_size is not available.
> + * Keep a list of <anon_file, prog> mapping, so that
> + * at file release stage, the prog can be released properly.
> + */
> +static struct list_head anon_iter_info;
> +static struct mutex anon_iter_info_mutex;
> +
> +/* incremented on every opened seq_file */
> +static atomic64_t session_id;
> +
> +static u32 get_total_priv_dsize(u32 old_size)
> +{
> +	return roundup(old_size, 8) + sizeof(struct extra_priv_data);
> +}
> +
> +static void *get_extra_priv_dptr(void *old_ptr, u32 old_size)
> +{
> +	return old_ptr + roundup(old_size, 8);
> +}
> +
> +static int anon_iter_release(struct inode *inode, struct file *file)
> +{
> +	struct anon_file_prog_assoc *finfo;
> +
> +	mutex_lock(&anon_iter_info_mutex);
> +	list_for_each_entry(finfo, &anon_iter_info, list) {
> +		if (finfo->file == file) {
> +			bpf_prog_put(finfo->prog);
> +			list_del(&finfo->list);
> +			kfree(finfo);
> +			break;
> +		}
> +	}
> +	mutex_unlock(&anon_iter_info_mutex);
> +
> +	return seq_release_private(inode, file);
> +}
> +
> +static const struct file_operations anon_bpf_iter_fops = {
> +	.read		= seq_read,
> +	.release	= anon_iter_release,
> +};
> +
>  int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
>  {
>  	struct bpf_iter_target_info *tinfo;

[ ... ]

> @@ -150,3 +223,90 @@ int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
>  	mutex_unlock(&bpf_iter_mutex);
>  	return ret;
>  }
> +
> +static void init_seq_file(void *priv_data, struct bpf_iter_target_info *tinfo,
> +			  struct bpf_prog *prog)
> +{
> +	struct extra_priv_data *extra_data;
> +
> +	if (tinfo->target_feature & BPF_DUMP_SEQ_NET_PRIVATE)
> +		set_seq_net_private((struct seq_net_private *)priv_data,
> +				    current->nsproxy->net_ns);
> +
> +	extra_data = get_extra_priv_dptr(priv_data, tinfo->seq_priv_size);
> +	extra_data->session_id = atomic64_add_return(1, &session_id);
> +	extra_data->prog = prog;
> +	extra_data->seq_num = 0;
> +	extra_data->has_last = false;
> +}
> +
> +static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
> +{
> +	struct anon_file_prog_assoc *finfo;
> +	struct bpf_iter_target_info *tinfo;
> +	struct bpf_prog *prog;
> +	u32 total_priv_dsize;
> +	void *priv_data;
> +
> +	finfo = kmalloc(sizeof(*finfo), GFP_USER | __GFP_NOWARN);
> +	if (!finfo)
> +		return -ENOMEM;
> +
> +	mutex_lock(&bpf_iter_mutex);
> +	prog = link->link.prog;
> +	bpf_prog_inc(prog);
> +	mutex_unlock(&bpf_iter_mutex);
> +
> +	tinfo = link->tinfo;
> +	total_priv_dsize = get_total_priv_dsize(tinfo->seq_priv_size);
> +	priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
> +	if (!priv_data) {
> +		bpf_prog_sub(prog, 1);
Could prog's refcnt go 0 here?  If yes, bpf_prog_put() should be used.

> +		kfree(finfo);
> +		return -ENOMEM;
> +	}
> +
> +	init_seq_file(priv_data, tinfo, prog);
> +
> +	finfo->file = file;
> +	finfo->prog = prog;
> +
> +	mutex_lock(&anon_iter_info_mutex);
> +	list_add(&finfo->list, &anon_iter_info);
> +	mutex_unlock(&anon_iter_info_mutex);
> +	return 0;
> +}
> +



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux