Re: [PATCH bpf-next 1/3] bpf: implement relay map basis

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 12/22/2023 8:21 PM, Philo Lu wrote:
> BPF_MAP_TYPE_RELAY is implemented based on relay interface, which
> creates per-cpu buffer to transfer data. Each buffer is essentially a
> list of fix-sized sub-buffers, and is exposed to user space as files in
> debugfs. attr->max_entries is used as subbuf size and attr->map_extra is
> used as subbuf num. Currently, the default value of subbuf num is 8.
>
> The data can be accessed by read or mmap via these files. For example,
> if there are 2 cpus, files could be `/sys/kernel/debug/mydir/my_rmap0`
> and `/sys/kernel/debug/mydir/my_rmap1`.
>
> Buffer-only mode is used to create the relay map, which just allocates
> the buffer without creating user-space files. Then user can setup the
> files with map_update_elem, thus allowing user to define the directory
> name in debugfs. map_update_elem is implemented in the following patch.
>
> A new map flag named BPF_F_OVERWRITE is introduced to set overwrite mode
> of relay map.

Beside adding a new map type, could we consider only use kfuncs to
support the creation of rchan and the write of rchan ? I think
bpf_cpumask will be a good reference.
>
> Signed-off-by: Philo Lu <lulie@xxxxxxxxxxxxxxxxx>
> ---
>  include/linux/bpf_types.h |   3 +
>  include/uapi/linux/bpf.h  |   7 ++
>  kernel/bpf/Makefile       |   3 +
>  kernel/bpf/relaymap.c     | 157 ++++++++++++++++++++++++++++++++++++++
>  kernel/bpf/syscall.c      |   1 +
>  5 files changed, 171 insertions(+)
>  create mode 100644 kernel/bpf/relaymap.c
>

SNIP
> diff --git a/kernel/bpf/relaymap.c b/kernel/bpf/relaymap.c
> new file mode 100644
> index 000000000000..d0adc7f67758
> --- /dev/null
> +++ b/kernel/bpf/relaymap.c
> @@ -0,0 +1,157 @@
> +#include <linux/cpumask.h>
> +#include <linux/debugfs.h>
> +#include <linux/filter.h>
> +#include <linux/relay.h>
> +#include <linux/slab.h>
> +#include <linux/bpf.h>
> +#include <linux/err.h>
> +
> +#define RELAY_CREATE_FLAG_MASK (BPF_F_OVERWRITE)
> +
> +struct bpf_relay_map {
> +	struct bpf_map map;
> +	struct rchan *relay_chan;
> +	struct rchan_callbacks relay_cb;
> +};

It seems that there is no need to add relay_cb in bpf_relay_map. We
could define two kinds of rchan_callbacks: one for non-overwrite mode
and another one for overwrite mode.
> +
> +static struct dentry *create_buf_file_handler(const char *filename,
> +							   struct dentry *parent, umode_t mode,
> +							   struct rchan_buf *buf, int *is_global)
> +{
> +	/* Because we do relay_late_setup_files(), create_buf_file(NULL, NULL, ...)
> +	 * will be called by relay_open.
> +	 */
> +	if (!filename)
> +		return NULL;
> +
> +	return debugfs_create_file(filename, mode, parent, buf,
> +				   &relay_file_operations);
> +}
> +
> +static int remove_buf_file_handler(struct dentry *dentry)
> +{
> +	debugfs_remove(dentry);
> +	return 0;
> +}
> +
> +/* For non-overwrite, use default subbuf_start cb */
> +static int subbuf_start_overwrite(struct rchan_buf *buf, void *subbuf,
> +						   void *prev_subbuf, size_t prev_padding)
> +{
> +	return 1;
> +}
> +
> +/* bpf_attr is used as follows:
> + * - key size: must be 0
> + * - value size: value will be used as directory name by map_update_elem
> + *   (to create relay files). If passed as 0, it will be set to NAME_MAX as
> + *   default
> + *
> + * - max_entries: subbuf size
> + * - map_extra: subbuf num, default as 8
> + *
> + * When alloc, we do not set up relay files considering dir_name conflicts.
> + * Instead we use relay_late_setup_files() in map_update_elem(), and thus the
> + * value is used as dir_name, and map->name is used as base_filename.
> + */
> +static struct bpf_map *relay_map_alloc(union bpf_attr *attr)
> +{
> +	struct bpf_relay_map *rmap;
> +
> +	if (unlikely(attr->map_flags & ~RELAY_CREATE_FLAG_MASK))
> +		return ERR_PTR(-EINVAL);
> +
> +	/* key size must be 0 in relay map */
> +	if (unlikely(attr->key_size))
> +		return ERR_PTR(-EINVAL);
> +
> +	if (unlikely(attr->value_size > NAME_MAX)) {
> +		pr_warn("value_size should be no more than %d\n", NAME_MAX);
> +		return ERR_PTR(-EINVAL);
> +	} else if (attr->value_size == 0)
> +		attr->value_size = NAME_MAX;
> +
> +	/* set default subbuf num */
> +	attr->map_extra = attr->map_extra & UINT_MAX;

Should we reject invalid map_extra and return -EINVAL instead ?
> +	if (!attr->map_extra)
> +		attr->map_extra = 8;
> +
> +	if (!attr->map_name || strlen(attr->map_name) == 0)
> +		return ERR_PTR(-EINVAL);
> +
> +	rmap = bpf_map_area_alloc(sizeof(*rmap), NUMA_NO_NODE);
> +	if (!rmap)
> +		return ERR_PTR(-ENOMEM);
> +
> +	bpf_map_init_from_attr(&rmap->map, attr);
> +
> +	rmap->relay_cb.create_buf_file = create_buf_file_handler;
> +	rmap->relay_cb.remove_buf_file = remove_buf_file_handler;
> +	if (attr->map_flags & BPF_F_OVERWRITE)
> +		rmap->relay_cb.subbuf_start = subbuf_start_overwrite;
> +
> +	rmap->relay_chan = relay_open(NULL, NULL,
> +							attr->max_entries, attr->map_extra,
> +							&rmap->relay_cb, NULL);
> +	if (!rmap->relay_chan)
> +		return ERR_PTR(-EINVAL);

Need to free rmap.
> +
> +	return &rmap->map;
> +}
> +
> +static void relay_map_free(struct bpf_map *map)
> +{
> +	struct bpf_relay_map *rmap;
> +
> +	rmap = container_of(map, struct bpf_relay_map, map);
> +	relay_close(rmap->relay_chan);
> +	debugfs_remove_recursive(rmap->relay_chan->parent);

rmap->relay_chan may be freed by relay_close(), so it is not safe to
dereference relay_chan->parent here. And is debugfs_remove_recursive()
necessary here ?
> +	kfree(rmap);
> +}
> +
> +static void *relay_map_lookup_elem(struct bpf_map *map, void *key)
> +{
> +	return ERR_PTR(-EOPNOTSUPP);
> +}
> +
> +static long relay_map_update_elem(struct bpf_map *map, void *key, void *value,
> +				   u64 flags)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static long relay_map_delete_elem(struct bpf_map *map, void *key)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static int relay_map_get_next_key(struct bpf_map *map, void *key,
> +				    void *next_key)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
> +static u64 relay_map_mem_usage(const struct bpf_map *map)
> +{
> +	struct bpf_relay_map *rmap;
> +	u64 usage = sizeof(struct bpf_relay_map);
> +
> +	rmap = container_of(map, struct bpf_relay_map, map);
> +	usage += sizeof(struct rchan);
> +	usage += (sizeof(struct rchan_buf) + rmap->relay_chan->alloc_size)
> +			 * num_online_cpus();
> +	return usage;
> +}
> +
> +BTF_ID_LIST_SINGLE(relay_map_btf_ids, struct, bpf_relay_map)
> +const struct bpf_map_ops relay_map_ops = {
> +	.map_meta_equal = bpf_map_meta_equal,
> +	.map_alloc = relay_map_alloc,
> +	.map_free = relay_map_free,
> +	.map_lookup_elem = relay_map_lookup_elem,
> +	.map_update_elem = relay_map_update_elem,
> +	.map_delete_elem = relay_map_delete_elem,
> +	.map_get_next_key = relay_map_get_next_key,
> +	.map_mem_usage = relay_map_mem_usage,
> +	.map_btf_id = &relay_map_btf_ids[0],
> +};
>





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux