On Fri, Oct 22, 2021 at 03:02:45PM -0700, Joanne Koong wrote: > union bpf_attr { > struct { /* anonymous struct used by BPF_MAP_CREATE command */ > __u32 map_type; /* one of enum bpf_map_type */ > @@ -1274,6 +1281,7 @@ union bpf_attr { > * struct stored as the > * map value > */ > + __u64 map_extra; /* any per-map-type extra fields */ It needs a check to ensure map_extra is 0 for other maps. > }; > > struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index 7f33098ca63f..cf6ca339f3cd 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -7,7 +7,7 @@ endif > CFLAGS_core.o += $(call cc-disable-warning, override-init) $(cflags-nogcse-yy) > > obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o prog_iter.o > -obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o > +obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o bloom_filter.o > obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o > obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o > obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o > diff --git a/kernel/bpf/bloom_filter.c b/kernel/bpf/bloom_filter.c > new file mode 100644 > index 000000000000..0887f768ca6d > --- /dev/null > +++ b/kernel/bpf/bloom_filter.c > @@ -0,0 +1,198 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Copyright (c) 2021 Facebook */ > + > +#include <linux/bitmap.h> > +#include <linux/bpf.h> > +#include <linux/btf.h> > +#include <linux/err.h> > +#include <linux/jhash.h> > +#include <linux/random.h> > + > +#define BLOOM_CREATE_FLAG_MASK \ > + (BPF_F_NUMA_NODE | BPF_F_ZERO_SEED | BPF_F_ACCESS_MASK) > + > +struct bpf_bloom_filter { > + struct bpf_map map; > + u32 bitset_mask; > + u32 hash_seed; > + /* If the size of the values in the bloom filter is u32 aligned, > + * then it is more performant to use jhash2 as the underlying hash > + * function, else we use jhash. This tracks the number of u32s > + * in an u32-aligned value size. If the value size is not u32 aligned, > + * this will be 0. > + */ > + u32 aligned_u32_count; > + u32 nr_hash_funcs; > + unsigned long bitset[]; > +}; > + > +static inline u32 hash(struct bpf_bloom_filter *bloom, void *value, > + u64 value_size, u32 index) inline is not needed. Alignment is off also. ./scripts/checkpatch.pl --strict ... CHECK: Alignment should match open parenthesis #174: FILE: kernel/bpf/bloom_filter.c:30: +static inline u32 hash(struct bpf_bloom_filter *bloom, void *value, + u64 value_size, u32 index) Same for a few other places. > +{ > + u32 h; > + > + if (bloom->aligned_u32_count) > + h = jhash2(value, bloom->aligned_u32_count, > + bloom->hash_seed + index); > + else > + h = jhash(value, value_size, bloom->hash_seed + index); > + > + return h & bloom->bitset_mask; > +} [ ... ] > +static struct bpf_map *map_alloc(union bpf_attr *attr) > +{ > + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; > + int numa_node = bpf_map_attr_numa_node(attr); > + struct bpf_bloom_filter *bloom; > + > + if (!bpf_capable()) > + return ERR_PTR(-EPERM); > + > + if (attr->key_size != 0 || attr->value_size == 0 || > + attr->max_entries == 0 || > + attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || > + !bpf_map_flags_access_ok(attr->map_flags) || > + (attr->map_extra & ~0xF)) > + return ERR_PTR(-EINVAL); > + > + /* The lower 4 bits of map_extra specify the number of hash functions */ > + nr_hash_funcs = attr->map_extra & 0xF; > + if (nr_hash_funcs == 0) > + /* Default to using 5 hash functions if unspecified */ > + nr_hash_funcs = 5; > + > + /* For the bloom filter, the optimal bit array size that minimizes the > + * false positive probability is n * k / ln(2) where n is the number of > + * expected entries in the bloom filter and k is the number of hash > + * functions. We use 7 / 5 to approximate 1 / ln(2). > + * > + * We round this up to the nearest power of two to enable more efficient > + * hashing using bitmasks. The bitmask will be the bit array size - 1. > + * > + * If this overflows a u32, the bit array size will have 2^32 (4 > + * GB) bits. > + */ > + if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || Comparing with v4, it is using max_entries to mean number of values instead of bits and also not exposing BPF_BLOOM_FILTER_BITSET_SZ macro to calculate the number of bits. just want to ensure it is the intention in v5 since I don't see it in the change log. > + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || > + nr_bits > (1UL << 31)) { > + /* The bit array size is 2^32 bits but to avoid overflowing the > + * u32, we use U32_MAX, which will round up to the equivalent > + * number of bytes > + */ > + bitset_bytes = BITS_TO_BYTES(U32_MAX); > + bitset_mask = U32_MAX; > + } else { > + if (nr_bits <= BITS_PER_LONG) > + nr_bits = BITS_PER_LONG; > + else > + nr_bits = roundup_pow_of_two(nr_bits); > + bitset_bytes = BITS_TO_BYTES(nr_bits); > + bitset_mask = nr_bits - 1; > + } > + > + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); > + bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, > + numa_node); > + > + if (!bloom) > + return ERR_PTR(-ENOMEM); > + > + bpf_map_init_from_attr(&bloom->map, attr); > + > + bloom->nr_hash_funcs = nr_hash_funcs; > + bloom->bitset_mask = bitset_mask; > + > + /* Check whether the value size is u32-aligned */ > + if ((attr->value_size & (sizeof(u32) - 1)) == 0) > + bloom->aligned_u32_count = > + attr->value_size / sizeof(u32); > + > + if (!(attr->map_flags & BPF_F_ZERO_SEED)) > + bloom->hash_seed = get_random_int(); > + > + return &bloom->map; > +}