On Wed, Oct 27, 2021 at 04:45:00PM -0700, Joanne Koong wrote: > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > index 31421c74ba08..50105e0b8fcc 100644 > --- a/include/linux/bpf.h > +++ b/include/linux/bpf.h > @@ -169,6 +169,7 @@ struct bpf_map { The earlier context is copied here: struct bpf_map *inner_map_meta; #ifdef CONFIG_SECURITY void *security; #endif > u32 value_size; > u32 max_entries; > u32 map_flags; > + u64 map_extra; /* any per-map-type extra fields */ There is a 4 byte hole before the new 'u64 map_extra'. Try to move it before map_flags Later in this struct. This existing comment needs to be updated also: /* 22 bytes hole */ > int spin_lock_off; /* >=0 valid offset, <0 error */ > int timer_off; /* >=0 valid offset, <0 error */ > u32 id; > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index 9c81724e4b98..c4424ac2fa02 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -125,6 +125,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) > BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) > #endif > BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops) > +BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops) > > BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) > BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index c10820037883..8bead4aa3ad0 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -906,6 +906,7 @@ enum bpf_map_type { > BPF_MAP_TYPE_RINGBUF, > BPF_MAP_TYPE_INODE_STORAGE, > BPF_MAP_TYPE_TASK_STORAGE, > + BPF_MAP_TYPE_BLOOM_FILTER, > }; > > /* Note that tracing related programs such as > @@ -1274,6 +1275,13 @@ union bpf_attr { > * struct stored as the > * map value > */ > + /* Any per-map-type extra fields > + * > + * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the > + * number of hash functions (if 0, the bloom filter will default > + * to using 5 hash functions). > + */ > + __u64 map_extra; > }; > > struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ > @@ -5638,6 +5646,7 @@ struct bpf_map_info { > __u32 btf_id; > __u32 btf_key_type_id; > __u32 btf_value_type_id; There is also a 4 byte hole here. A "__u32 :32" is needed. You can find details in 36f9814a494a ("bpf: fix uapi hole for 32 bit compat applications") > + __u64 map_extra; > } __attribute__((aligned(8))); [ ... ] > +static int peek_elem(struct bpf_map *map, void *value) These generic map-ops names could be confusing in tracing and in perf-report. There was a 'bloom_filter_map_' prefix in the earlier version. I could have missed something in the earlier discussion threads. What was the reason in dropping the prefix? > +{ > + struct bpf_bloom_filter *bloom = > + container_of(map, struct bpf_bloom_filter, map); > + u32 i, h; > + > + for (i = 0; i < bloom->nr_hash_funcs; i++) { > + h = hash(bloom, value, map->value_size, i); > + if (!test_bit(h, bloom->bitset)) > + return -ENOENT; > + } > + > + return 0; > +} > + > +static int push_elem(struct bpf_map *map, void *value, u64 flags) > +{ > + struct bpf_bloom_filter *bloom = > + container_of(map, struct bpf_bloom_filter, map); > + u32 i, h; > + > + if (flags != BPF_ANY) > + return -EINVAL; > + > + for (i = 0; i < bloom->nr_hash_funcs; i++) { > + h = hash(bloom, value, map->value_size, i); > + set_bit(h, bloom->bitset); > + } > + > + return 0; > +} > + > +static int pop_elem(struct bpf_map *map, void *value) > +{ > + return -EOPNOTSUPP; > +} > + > +static struct bpf_map *map_alloc(union bpf_attr *attr) > +{ > + u32 bitset_bytes, bitset_mask, nr_hash_funcs, nr_bits; > + int numa_node = bpf_map_attr_numa_node(attr); > + struct bpf_bloom_filter *bloom; > + > + if (!bpf_capable()) > + return ERR_PTR(-EPERM); > + > + if (attr->key_size != 0 || attr->value_size == 0 || > + attr->max_entries == 0 || > + attr->map_flags & ~BLOOM_CREATE_FLAG_MASK || > + !bpf_map_flags_access_ok(attr->map_flags) || > + (attr->map_extra & ~0xF)) > + return ERR_PTR(-EINVAL); > + > + /* The lower 4 bits of map_extra specify the number of hash functions */ > + nr_hash_funcs = attr->map_extra & 0xF; nit. "& 0xF" is unnecessary. It has just been tested immediately above. > + if (nr_hash_funcs == 0) > + /* Default to using 5 hash functions if unspecified */ > + nr_hash_funcs = 5; > + > + /* For the bloom filter, the optimal bit array size that minimizes the > + * false positive probability is n * k / ln(2) where n is the number of > + * expected entries in the bloom filter and k is the number of hash > + * functions. We use 7 / 5 to approximate 1 / ln(2). > + * > + * We round this up to the nearest power of two to enable more efficient > + * hashing using bitmasks. The bitmask will be the bit array size - 1. > + * > + * If this overflows a u32, the bit array size will have 2^32 (4 > + * GB) bits. > + */ > + if (check_mul_overflow(attr->max_entries, nr_hash_funcs, &nr_bits) || > + check_mul_overflow(nr_bits / 5, (u32)7, &nr_bits) || > + nr_bits > (1UL << 31)) { > + /* The bit array size is 2^32 bits but to avoid overflowing the > + * u32, we use U32_MAX, which will round up to the equivalent > + * number of bytes > + */ > + bitset_bytes = BITS_TO_BYTES(U32_MAX); > + bitset_mask = U32_MAX; > + } else { > + if (nr_bits <= BITS_PER_LONG) > + nr_bits = BITS_PER_LONG; > + else > + nr_bits = roundup_pow_of_two(nr_bits); > + bitset_bytes = BITS_TO_BYTES(nr_bits); > + bitset_mask = nr_bits - 1; > + } > + > + bitset_bytes = roundup(bitset_bytes, sizeof(unsigned long)); > + bloom = bpf_map_area_alloc(sizeof(*bloom) + bitset_bytes, numa_node); > + > + if (!bloom) > + return ERR_PTR(-ENOMEM); > + > + bpf_map_init_from_attr(&bloom->map, attr); > + > + bloom->nr_hash_funcs = nr_hash_funcs; > + bloom->bitset_mask = bitset_mask; > + > + /* Check whether the value size is u32-aligned */ > + if ((attr->value_size & (sizeof(u32) - 1)) == 0) > + bloom->aligned_u32_count = > + attr->value_size / sizeof(u32); > + > + if (!(attr->map_flags & BPF_F_ZERO_SEED)) > + bloom->hash_seed = get_random_int(); > + > + return &bloom->map; > +}