Move most macros and helpers to a header file, so that they can be conveniently used by related implementations. No functional changes are intended here. Signed-off-by: Stefano Brivio <sbrivio@xxxxxxxxxx> --- net/netfilter/nft_set_pipapo.c | 218 ++--------------------------- net/netfilter/nft_set_pipapo.h | 242 +++++++++++++++++++++++++++++++++ 2 files changed, 250 insertions(+), 210 deletions(-) create mode 100644 net/netfilter/nft_set_pipapo.h diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 63d54f9fcac0..7fc3ec7232b7 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -330,179 +330,20 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/log2.h> #include <linux/module.h> #include <linux/netlink.h> #include <linux/netfilter.h> #include <linux/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_core.h> #include <uapi/linux/netfilter/nf_tables.h> -#include <net/ipv6.h> /* For the maximum length of a field */ #include <linux/bitmap.h> #include <linux/bitops.h> -/* Count of concatenated fields depends on count of 32-bit nftables registers */ -#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT - -/* Largest supported field size */ -#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) -#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) - -/* Number of bits to be grouped together in lookup table buckets, arbitrary */ -#define NFT_PIPAPO_GROUP_BITS 4 -#define NFT_PIPAPO_GROUPS_PER_BYTE (BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS) - -/* Fields are padded to 32 bits in input registers */ -#define NFT_PIPAPO_GROUPS_PADDED_SIZE(x) \ - (round_up((x) / NFT_PIPAPO_GROUPS_PER_BYTE, sizeof(u32))) -#define NFT_PIPAPO_GROUPS_PADDING(x) \ - (NFT_PIPAPO_GROUPS_PADDED_SIZE((x)) - (x) / NFT_PIPAPO_GROUPS_PER_BYTE) - -/* Number of buckets, given by 2 ^ n, with n grouped bits */ -#define NFT_PIPAPO_BUCKETS (1 << NFT_PIPAPO_GROUP_BITS) - -/* Each n-bit range maps to up to n * 2 rules */ -#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) - -/* Use the rest of mapping table buckets for rule indices, but it makes no sense - * to exceed 32 bits - */ -#if BITS_PER_LONG == 64 -#define NFT_PIPAPO_MAP_TOBITS 32 -#else -#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) -#endif - -/* ...which gives us the highest allowed index for a rule */ -#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ - - (1UL << NFT_PIPAPO_MAP_NBITS)) - -/* Definitions for vectorised implementations */ -#ifdef NFT_PIPAPO_ALIGN -#define NFT_PIPAPO_ALIGN_HEADROOM \ - (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) -#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) -#define NFT_PIPAPO_LT_ASSIGN(field, x) \ - do { \ - (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ - (field)->lt = (x); \ - } while (0) -#else -#define NFT_PIPAPO_ALIGN_HEADROOM 0 -#define NFT_PIPAPO_LT_ALIGN(lt) (lt) -#define NFT_PIPAPO_LT_ASSIGN(field, x) ((field)->lt = (x)) -#endif /* NFT_PIPAPO_ALIGN */ - -#define nft_pipapo_for_each_field(field, index, match) \ - for ((field) = (match)->f, (index) = 0; \ - (index) < (match)->field_count; \ - (index)++, (field)++) - -/** - * union nft_pipapo_map_bucket - Bucket of mapping table - * @to: First rule number (in next field) this rule maps to - * @n: Number of rules (in next field) this rule maps to - * @e: If there's no next field, pointer to element this rule maps to - */ -union nft_pipapo_map_bucket { - struct { -#if BITS_PER_LONG == 64 - static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); - u32 to; - - static_assert(NFT_PIPAPO_MAP_NBITS <= 32); - u32 n; -#else - unsigned long to:NFT_PIPAPO_MAP_TOBITS; - unsigned long n:NFT_PIPAPO_MAP_NBITS; -#endif - }; - struct nft_pipapo_elem *e; -}; - -/** - * struct nft_pipapo_field - Lookup, mapping tables and related data for a field - * @groups: Amount of 4-bit groups - * @rules: Number of inserted rules - * @bsize: Size of each bucket in lookup table, in longs - * @lt: Lookup table: 'groups' rows of NFT_PIPAPO_BUCKETS buckets - * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes - * @mt: Mapping table: one bucket per rule - */ -struct nft_pipapo_field { - int groups; - unsigned long rules; - size_t bsize; -#ifdef NFT_PIPAPO_ALIGN - unsigned long *lt_aligned; -#endif - unsigned long *lt; - union nft_pipapo_map_bucket *mt; -}; - -/** - * struct nft_pipapo_match - Data used for lookup and matching - * @field_count Amount of fields in set - * @scratch: Preallocated per-CPU maps for partial matching results - * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes - * @bsize_max: Maximum lookup table bucket size of all fields, in longs - * @rcu Matching data is swapped on commits - * @f: Fields, with lookup and mapping tables - */ -struct nft_pipapo_match { - int field_count; -#ifdef NFT_PIPAPO_ALIGN - unsigned long * __percpu *scratch_aligned; -#endif - unsigned long * __percpu *scratch; - size_t bsize_max; - struct rcu_head rcu; - struct nft_pipapo_field f[0]; -}; +#include "nft_set_pipapo.h" /* Current working bitmap index, toggled between field matches */ static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index); -/** - * struct nft_pipapo - Representation of a set - * @match: Currently in-use matching data - * @clone: Copy where pending insertions and deletions are kept - * @groups: Total amount of 4-bit groups for fields in this set - * @width: Total bytes to be matched for one packet, including padding - * @dirty: Working copy has pending insertions or deletions - * @last_gc: Timestamp of last garbage collection run, jiffies - * @start_data: Key data of start element for insertion - * @start_elem: Start element for insertion - * - * nft_pipapo_insert() is called separately with start and end elements to be - * inserted. While a tree implementation (see nft_set_rbtree) can insert nodes - * in the tree right away, we need to cache the start element, and perform the - * actual insertion once the end element is also seen. This is fine as insertion - * is serialised by the nftables API. - */ -struct nft_pipapo { - struct nft_pipapo_match __rcu *match; - struct nft_pipapo_match *clone; - int groups; - int width; - bool dirty; - unsigned long last_gc; - u8 start_data[NFT_DATA_VALUE_MAXLEN * sizeof(u32)]; - struct nft_pipapo_elem *start_elem; -}; - -struct nft_pipapo_elem; - -/** - * struct nft_pipapo_elem - API-facing representation of single set element - * @start: Pointer to element that represents start of interval - * @ext: nftables API extensions - */ -struct nft_pipapo_elem { - struct nft_pipapo_elem *start; - struct nft_set_ext ext; -}; - /** * pipapo_refill() - For each set bit, set bits from selected mapping table item * @map: Bitmap to be scanned for set bits @@ -520,9 +361,8 @@ struct nft_pipapo_elem { * * Return: -1 on no match, bit position on 'match_only', 0 otherwise. */ -static int pipapo_refill(unsigned long *map, int len, int rules, - unsigned long *dst, union nft_pipapo_map_bucket *mt, - bool match_only) +int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool match_only) { unsigned long bitset; int k, ret = -1; @@ -640,7 +480,7 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; unsigned long *lt = f->lt; - int b, group, j; + int b, j; /* For each 4-bit group: select lookup table bucket depending on * packet bytes value, then AND bucket value. Unroll loops for @@ -658,21 +498,8 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, } else if (f->groups == 32) { NFT_PIPAPO_MATCH_32(res_map, lt, f->bsize, rp, j); } else { - for (group = 0; group < f->groups; group++) { - u8 v; - - if (group % 2) { - v = *rp & 0x0f; - rp++; - } else { - v = *rp >> 4; - } - __bitmap_and(res_map, res_map, - lt + v * f->bsize, - f->bsize * BITS_PER_LONG); - - lt += f->bsize * NFT_PIPAPO_BUCKETS; - } + pipapo_and_field_buckets(f, res_map, rp); + rp += f->groups / NFT_PIPAPO_GROUPS_PER_BYTE; } /* Now populate the bitmap for the next field, unless this is @@ -2052,21 +1879,11 @@ static u64 nft_pipapo_privsize(const struct nlattr * const nla[], } /** - * nft_pipapo_estimate() - Estimate set size, space and lookup complexity + * nft_pipapo_estimate() - Set size, space and lookup complexity * @desc: Set description, initial element count used here * @features: Flags: NFT_SET_SUBKEY needs to be there * @est: Storage for estimation data * - * The size for this set type can vary dramatically, as it depends on the number - * of rules (composing netmasks) the entries expand to. We compute the worst - * case here, in order to ensure that other types are used if concatenation of - * ranges is not needed. - * - * In general, for a non-ranged entry or a single composing netmask, we need - * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that - * is, each input bit needs four bits of matching data), plus a bucket in the - * mapping table for each field. - * * Return: true */ static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features, @@ -2075,26 +1892,7 @@ static bool nft_pipapo_estimate(const struct nft_set_desc *desc, u32 features, if (!(features & NFT_SET_SUBKEY)) return false; - est->size = sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match); - - /* Worst-case with current amount of 32-bit VM registers (16 of them): - * - 2 IPv6 addresses 8 registers - * - 2 interface names 8 registers - * that is, four 128-bit fields: - */ - est->size += sizeof(struct nft_pipapo_field) * 4; - - /* expanding to worst-case ranges, 128 * 2 rules each, resulting in: - * - 128 4-bit groups - * - each set entry taking 256 bits in each bucket - */ - est->size += desc->size * NFT_PIPAPO_MAX_BITS / NFT_PIPAPO_GROUP_BITS * - NFT_PIPAPO_BUCKETS * NFT_PIPAPO_MAX_BITS * 2 / - BITS_PER_BYTE; - - /* and we need mapping buckets, too */ - est->size += desc->size * NFT_PIPAPO_MAP_NBITS * - sizeof(union nft_pipapo_map_bucket); + est->size = pipapo_estimate_size(desc->size); est->lookup = NFT_SET_CLASS_O_LOG_N; diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h new file mode 100644 index 000000000000..027ca1a1a9d3 --- /dev/null +++ b/net/netfilter/nft_set_pipapo.h @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#ifndef _NFT_SET_PIPAPO_H + +#include <linux/log2.h> +#include <net/ipv6.h> /* For the maximum length of a field */ + +/* Count of concatenated fields depends on count of 32-bit nftables registers */ +#define NFT_PIPAPO_MAX_FIELDS NFT_REG32_COUNT + +/* Largest supported field size */ +#define NFT_PIPAPO_MAX_BYTES (sizeof(struct in6_addr)) +#define NFT_PIPAPO_MAX_BITS (NFT_PIPAPO_MAX_BYTES * BITS_PER_BYTE) + +/* Number of bits to be grouped together in lookup table buckets, arbitrary */ +#define NFT_PIPAPO_GROUP_BITS 4 +#define NFT_PIPAPO_GROUPS_PER_BYTE (BITS_PER_BYTE / NFT_PIPAPO_GROUP_BITS) + +/* Fields are padded to 32 bits in input registers */ +#define NFT_PIPAPO_GROUPS_PADDED_SIZE(x) \ + (round_up((x) / NFT_PIPAPO_GROUPS_PER_BYTE, sizeof(u32))) +#define NFT_PIPAPO_GROUPS_PADDING(x) \ + (NFT_PIPAPO_GROUPS_PADDED_SIZE((x)) - (x) / NFT_PIPAPO_GROUPS_PER_BYTE) + +/* Number of buckets, given by 2 ^ n, with n grouped bits */ +#define NFT_PIPAPO_BUCKETS (1 << NFT_PIPAPO_GROUP_BITS) + +/* Each n-bit range maps to up to n * 2 rules */ +#define NFT_PIPAPO_MAP_NBITS (const_ilog2(NFT_PIPAPO_MAX_BITS * 2)) + +/* Use the rest of mapping table buckets for rule indices, but it makes no sense + * to exceed 32 bits + */ +#if BITS_PER_LONG == 64 +#define NFT_PIPAPO_MAP_TOBITS 32 +#else +#define NFT_PIPAPO_MAP_TOBITS (BITS_PER_LONG - NFT_PIPAPO_MAP_NBITS) +#endif + +/* ...which gives us the highest allowed index for a rule */ +#define NFT_PIPAPO_RULE0_MAX ((1UL << (NFT_PIPAPO_MAP_TOBITS - 1)) \ + - (1UL << NFT_PIPAPO_MAP_NBITS)) + +/* Definitions for vectorised implementations */ +#ifdef NFT_PIPAPO_ALIGN +#define NFT_PIPAPO_ALIGN_HEADROOM \ + (NFT_PIPAPO_ALIGN - ARCH_KMALLOC_MINALIGN) +#define NFT_PIPAPO_LT_ALIGN(lt) (PTR_ALIGN((lt), NFT_PIPAPO_ALIGN)) +#define NFT_PIPAPO_LT_ASSIGN(field, x) \ + do { \ + (field)->lt_aligned = NFT_PIPAPO_LT_ALIGN(x); \ + (field)->lt = (x); \ + } while (0); +#else +#define NFT_PIPAPO_ALIGN_HEADROOM 0 +#define NFT_PIPAPO_LT_ALIGN(lt) (lt) +#define NFT_PIPAPO_LT_ASSIGN(field, x) \ + do { \ + (field)->lt = (x); \ + } while (0); +#endif /* NFT_PIPAPO_ALIGN */ + +#define nft_pipapo_for_each_field(field, index, match) \ + for ((field) = (match)->f, (index) = 0; \ + (index) < (match)->field_count; \ + (index)++, (field)++) + +/** + * union nft_pipapo_map_bucket - Bucket of mapping table + * @to: First rule number (in next field) this rule maps to + * @n: Number of rules (in next field) this rule maps to + * @e: If there's no next field, pointer to element this rule maps to + */ +union nft_pipapo_map_bucket { + struct { +#if BITS_PER_LONG == 64 + static_assert(NFT_PIPAPO_MAP_TOBITS <= 32); + u32 to; + + static_assert(NFT_PIPAPO_MAP_NBITS <= 32); + u32 n; +#else + unsigned long to:NFT_PIPAPO_MAP_TOBITS; + unsigned long n:NFT_PIPAPO_MAP_NBITS; +#endif + }; + struct nft_pipapo_elem *e; +}; + +/** + * struct nft_pipapo_field - Lookup, mapping tables and related data for a field + * @groups: Amount of 4-bit groups + * @rules: Number of inserted rules + * @bsize: Size of each bucket in lookup table, in longs + * @lt: Lookup table: 'groups' rows of NFT_PIPAPO_BUCKETS buckets + * @lt_aligned: Version of @lt aligned to NFT_PIPAPO_ALIGN bytes + * @mt: Mapping table: one bucket per rule + */ +struct nft_pipapo_field { + int groups; + unsigned long rules; + size_t bsize; + unsigned long *lt; +#ifdef NFT_PIPAPO_ALIGN + unsigned long *lt_aligned; +#endif + union nft_pipapo_map_bucket *mt; +}; + +/** + * struct nft_pipapo_match - Data used for lookup and matching + * @field_count Amount of fields in set + * @scratch: Preallocated per-CPU maps for partial matching results + * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes + * @bsize_max: Maximum lookup table bucket size of all fields, in longs + * @rcu Matching data is swapped on commits + * @f: Fields, with lookup and mapping tables + */ +struct nft_pipapo_match { + int field_count; +#ifdef NFT_PIPAPO_ALIGN + unsigned long * __percpu *scratch_aligned; +#endif + unsigned long * __percpu *scratch; + size_t bsize_max; + struct rcu_head rcu; + struct nft_pipapo_field f[0]; +}; + +/** + * struct nft_pipapo - Representation of a set + * @match: Currently in-use matching data + * @clone: Copy where pending insertions and deletions are kept + * @groups: Total amount of 4-bit groups for fields in this set + * @width: Total bytes to be matched for one packet, including padding + * @dirty: Working copy has pending insertions or deletions + * @last_gc: Timestamp of last garbage collection run, jiffies + * @start_data: Key data of start element for insertion + * @start_elem: Start element for insertion + * + * nft_pipapo_insert() is called separately with start and end elements to be + * inserted. While a tree implementation (see nft_set_rbtree) can insert nodes + * in the tree right away, we need to cache the start element, and perform the + * actual insertion once the end element is also seen. This is fine as insertion + * is serialised by the nftables API. + */ +struct nft_pipapo { + struct nft_pipapo_match __rcu *match; + struct nft_pipapo_match *clone; + int groups; + int width; + bool dirty; + unsigned long last_gc; + u8 start_data[NFT_DATA_VALUE_MAXLEN * sizeof(u32)]; + struct nft_pipapo_elem *start_elem; +}; + +struct nft_pipapo_elem; + +/** + * struct nft_pipapo_elem - API-facing representation of single set element + * @start: Pointer to element that represents start of interval + * @ext: nftables API extensions + */ +struct nft_pipapo_elem { + struct nft_pipapo_elem *start; + struct nft_set_ext ext; +}; + +int pipapo_refill(unsigned long *map, int len, int rules, unsigned long *dst, + union nft_pipapo_map_bucket *mt, bool match_only); + +/** + * pipapo_and_field_buckets() - Select buckets from packet data and intersect + * @f: Field including lookup table + * @dst: Scratch map for partial matching result + * @rp: Packet data register pointer + */ +static inline void pipapo_and_field_buckets(struct nft_pipapo_field *f, + unsigned long *dst, const u8 *rp) +{ + unsigned long *lt = NFT_PIPAPO_LT_ALIGN(f->lt); + int group; + + for (group = 0; group < f->groups; group++) { + u8 v; + + if (group % 2) { + v = *rp & 0x0f; + rp++; + } else { + v = *rp >> 4; + } + __bitmap_and(dst, dst, lt + v * f->bsize, + f->bsize * BITS_PER_LONG); + + lt += f->bsize * NFT_PIPAPO_BUCKETS; + } +} + +/** + * pipapo_estimate_size() - Estimate worst-case for set size + * @elem_count: Count of initial set elements + * + * The size for this set type can vary dramatically, as it depends on the number + * of rules (composing netmasks) the entries expand to. We compute the worst + * case here, in order to ensure that other types are used if concatenation of + * ranges is not needed. + * + * In general, for a non-ranged entry or a single composing netmask, we need + * one bit in each of the sixteen NFT_PIPAPO_BUCKETS, for each 4-bit group (that + * is, each input bit needs four bits of matching data), plus a bucket in the + * mapping table for each field. + * + * Return: estimated worst-case set size, in bytes. + */ +static int pipapo_estimate_size(int elem_count) +{ + int size = sizeof(struct nft_pipapo) + sizeof(struct nft_pipapo_match); + + /* Worst-case with current amount of 32-bit VM registers (16 of them): + * - 2 IPv6 addresses 8 registers + * - 2 interface names 8 registers + * that is, four 128-bit fields: + */ + size += sizeof(struct nft_pipapo_field) * 4; + + /* expanding to worst-case ranges, 128 * 2 rules each, resulting in: + * - 128 4-bit groups + * - each set entry taking 256 bits in each bucket + */ + size += elem_count * NFT_PIPAPO_MAX_BITS / NFT_PIPAPO_GROUP_BITS * + NFT_PIPAPO_BUCKETS * NFT_PIPAPO_MAX_BITS * 2 / BITS_PER_BYTE; + + /* and we need mapping buckets, too */ + size += elem_count * NFT_PIPAPO_MAP_NBITS * + sizeof(union nft_pipapo_map_bucket); + + return size; +} + +#endif /* _NFT_SET_PIPAPO_H */ -- 2.20.1