Re: [PATCH v5 bpf-next 1/5] bpf: Add bloom filter map implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Oct 22, 2021 at 03:02:45PM -0700, Joanne Koong wrote:
>  union bpf_attr {
>  	struct { /* anonymous struct used by BPF_MAP_CREATE command */
>  		__u32	map_type;	/* one of enum bpf_map_type */
> @@ -1274,6 +1281,7 @@ union bpf_attr {
>  						   * struct stored as the
>  						   * map value
>  						   */
> +		__u64	map_extra;	/* any per-map-type extra fields */
It needs a check to ensure map_extra is 0 for other maps.

>  	};
>  
>  	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
> diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
> index 7f33098ca63f..cf6ca339f3cd 100644
> --- a/kernel/bpf/Makefile
> +++ b/kernel/bpf/Makefile
> @@ -7,7 +7,7 @@ endif
>  CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy)
>  
>  obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o
> -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
> +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o
>  obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
>  obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o
>  obj-${CONFIG_BPF_LSM}	  += bpf_inode_storage.o
> diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c
> new file mode 100644
> index 000000000000..0887f768ca6d
> --- /dev/null
> +++ b/kernel/bpf/bloom_filter.c
> @@ -0,0 +1,198 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/* Copyright (c) 2021 Facebook */
> +
> +#include <linux/bitmap.h>
> +#include <linux/bpf.h>
> +#include <linux/btf.h>
> +#include <linux/err.h>
> +#include <linux/jhash.h>
> +#include <linux/random.h>
> +
> +#define BLOOM_CREATE_FLAG_MASK \
> +	(BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK)
> +
> +struct bpf_bloom_filter {
> +	struct bpf_map map;
> +	u32 bitset_mask;
> +	u32 hash_seed;
> +	/* If the size of the values in the bloom filter is u32 aligned,
> +	 * then it is more performant to use jhash2 as the underlying hash
> +	 * function, else we use jhash. This tracks the number of u32s
> +	 * in an u32-aligned value size. If the value size is not u32 aligned,
> +	 * this will be 0.
> +	 */
> +	u32 aligned_u32_count;
> +	u32 nr_hash_funcs;
> +	unsigned long bitset[];
> +};
> +
> +static inline u32 hash(struct bpf_bloom_filter *bloom, void *value,
> +		u64 value_size, u32 index)
inline is not needed.

Alignment is off also.

./scripts/checkpatch.pl --strict ...
 
CHECK: Alignment should match open parenthesis
#174: FILE: kernel/bpf/bloom_filter.c:30:
+static inline u32 hash(struct bpf_bloom_filter *bloom, void *value,
+               u64 value_size, u32 index)

Same for a few other places.

> +{
> +	u32 h;
> +
> +	if (bloom->aligned_u32_count)
> +		h = jhash2(value, bloom->aligned_u32_count,
> +			   bloom->hash_seed + index);
> +	else
> +		h = jhash(value, value_size, bloom->hash_seed + index);
> +
> +	return h & bloom->bitset_mask;
> +}

[ ... ]

> +static struct bpf_map *map_alloc(union bpf_attr *attr)
> +{
> +	u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits;
> +	int numa_node = bpf_map_attr_numa_node(attr);
> +	struct bpf_bloom_filter *bloom;
> +
> +	if (!bpf_capable())
> +		return ERR_PTR(-EPERM);
> +
> +	if (attr->key_size != 0 || attr->value_size == 0 ||
> +	    attr->max_entries == 0 ||
> +	    attr->map_flags & ~BLOOM_CREATE_FLAG_MASK ||
> +	    !bpf_map_flags_access_ok(attr->map_flags) ||
> +	    (attr->map_extra & ~0xF))
> +		return ERR_PTR(-EINVAL);
> +
> +	/* The lower 4 bits of map_extra specify the number of hash functions */
> +	nr_hash_funcs = attr->map_extra & 0xF;
> +	if (nr_hash_funcs == 0)
> +		/* Default to using 5 hash functions if unspecified */
> +		nr_hash_funcs = 5;
> +
> +	/* For the bloom filter, the optimal bit array size that minimizes the
> +	 * false positive probability is n * k / ln(2) where n is the number of
> +	 * expected entries in the bloom filter and k is the number of hash
> +	 * functions. We use 7 / 5 to approximate 1 / ln(2).
> +	 *
> +	 * We round this up to the nearest power of two to enable more efficient
> +	 * hashing using bitmasks. The bitmask will be the bit array size - 1.
> +	 *
> +	 * If this overflows a u32, the bit array size will have 2^32 (4
> +	 * GB) bits.
> +	 */
> +	if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) ||
Comparing with v4, it is using max_entries to mean number
of values instead of bits and also not exposing
BPF_BLOOM_FILTER_BITSET_SZ macro to calculate the number of bits.
just want to ensure it is the intention in v5 since I don't see it
in the change log.

> +	    check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) ||
> +	    nr_bits > (1UL << 31)) {
> +		/* The bit array size is 2^32 bits but to avoid overflowing the
> +		 * u32, we use U32_MAX, which will round up to the equivalent
> +		 * number of bytes
> +		 */
> +		bitset_bytes = BITS_TO_BYTES(U32_MAX);
> +		bitset_mask = U32_MAX;
> +	} else {
> +		if (nr_bits <= BITS_PER_LONG)
> +			nr_bits = BITS_PER_LONG;
> +		else
> +			nr_bits = roundup_pow_of_two(nr_bits);
> +		bitset_bytes = BITS_TO_BYTES(nr_bits);
> +		bitset_mask = nr_bits - 1;
> +	}
> +
> +	bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long));
> +	bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes,
> +					  numa_node);
> +
> +	if (!bloom)
> +		return ERR_PTR(-ENOMEM);
> +
> +	bpf_map_init_from_attr(&bloom->map, attr);
> +
> +	bloom->nr_hash_funcs = nr_hash_funcs;
> +	bloom->bitset_mask = bitset_mask;
> +
> +	/* Check whether the value size is u32-aligned */
> +	if ((attr->value_size & (sizeof(u32) - 1)) == 0)
> +		bloom->aligned_u32_count =
> +			attr->value_size / sizeof(u32);
> +
> +	if (!(attr->map_flags & BPF_F_ZERO_SEED))
> +		bloom->hash_seed = get_random_int();
> +
> +	return &bloom->map;
> +}



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux