For non-vectorised lookup implementations, this increases matching rates by 20 to 30% for most set types. Signed-off-by: Stefano Brivio <sbrivio@xxxxxxxxxx> --- v2: No changes net/netfilter/nft_set_pipapo.c | 86 +++++++++++++++++++++++++++++----- 1 file changed, 73 insertions(+), 13 deletions(-) diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c index 3cad9aedc168..0596dbd11319 100644 --- a/net/netfilter/nft_set_pipapo.c +++ b/net/netfilter/nft_set_pipapo.c @@ -526,6 +526,51 @@ static int pipapo_refill(unsigned long *map, int len, int rules, return ret; } +#define NFT_PIPAPO_AND_BUCKET(map, bucket, bsize, idx) \ + do { \ + for (idx = 0; idx < (bsize); idx++) \ + map[idx] &= *((bucket) + idx); \ + } while (0) + +#define NFT_PIPAPO_MATCH_2(map, lt, bsize, pkt, offset, idx) \ + do { \ + NFT_PIPAPO_AND_BUCKET(map, \ + lt + \ + (offset + 0 + (*pkt >> 4)) * bsize, \ + bsize, idx); \ + NFT_PIPAPO_AND_BUCKET(map, \ + lt + \ + (offset + 16 + (*pkt & 0x0f)) * bsize, \ + bsize, idx); \ + pkt++; \ + } while (0) + +#define NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, offset, idx) \ + do { \ + NFT_PIPAPO_MATCH_2(map, lt, bsize, pkt, offset, idx); \ + NFT_PIPAPO_MATCH_2(map, lt, bsize, pkt, offset + 2 * 16, idx); \ + } while (0) + +#define NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, offset, idx) \ + do { \ + NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, offset, idx); \ + NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, offset + 4 * 16, idx); \ + } while (0) + +#define NFT_PIPAPO_MATCH_12(map, lt, bsize, pkt, idx) \ + do { \ + NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 0, idx); \ + NFT_PIPAPO_MATCH_4(map, lt, bsize, pkt, 8 * 16, idx); \ + } while (0) + +#define NFT_PIPAPO_MATCH_32(map, lt, bsize, pkt, idx) \ + do { \ + NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 0, idx); \ + NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 8 * 16, idx); \ + NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 16 * 16, idx); \ + NFT_PIPAPO_MATCH_8(map, lt, bsize, pkt, 24 * 16, idx); \ + } while (0) + /** * nft_pipapo_lookup() - Lookup function * @net: Network namespace @@ -566,24 +611,39 @@ static bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set, nft_pipapo_for_each_field(f, i, m) { bool last = i == m->field_count - 1; unsigned long *lt = f->lt; - int b, group; + int b, group, j; /* For each 4-bit group: select lookup table bucket depending on - * packet bytes value, then AND bucket value + * packet bytes value, then AND bucket value. Unroll loops for + * the most common cases (protocol, port, IPv4 address, MAC + * address, IPv6 address). */ - for (group = 0; group < f->groups; group++) { - u8 v; + if (f->groups == 2) { + NFT_PIPAPO_MATCH_2(res_map, lt, f->bsize, rp, 0, j); + } else if (f->groups == 4) { + NFT_PIPAPO_MATCH_4(res_map, lt, f->bsize, rp, 0, j); + } else if (f->groups == 8) { + NFT_PIPAPO_MATCH_8(res_map, lt, f->bsize, rp, 0, j); + } else if (f->groups == 12) { + NFT_PIPAPO_MATCH_12(res_map, lt, f->bsize, rp, j); + } else if (f->groups == 32) { + NFT_PIPAPO_MATCH_32(res_map, lt, f->bsize, rp, j); + } else { + for (group = 0; group < f->groups; group++) { + u8 v; + + if (group % 2) { + v = *rp & 0x0f; + rp++; + } else { + v = *rp >> 4; + } + __bitmap_and(res_map, res_map, + lt + v * f->bsize, + f->bsize * BITS_PER_LONG); - if (group % 2) { - v = *rp & 0x0f; - rp++; - } else { - v = *rp >> 4; + lt += f->bsize * NFT_PIPAPO_BUCKETS; } - __bitmap_and(res_map, res_map, lt + v * f->bsize, - f->bsize * BITS_PER_LONG); - - lt += f->bsize * NFT_PIPAPO_BUCKETS; } /* Now populate the bitmap for the next field, unless this is -- 2.20.1