This function is used for every packet, siphash_4u64 is noticeably faster than using local buffer + siphash: Before: 1.23% kpktgend_0 [kernel.vmlinux] [k] __siphash_unaligned 0.14% kpktgend_0 [nf_conntrack] [k] hash_conntrack_raw After: 0.79% kpktgend_0 [kernel.vmlinux] [k] siphash_4u64 0.15% kpktgend_0 [nf_conntrack] [k] hash_conntrack_raw In the pktgen test this gives about ~2.4% performance improvement. Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- v2: better ipv4 handling. In ipv4 case, all[0..3] are 0, so place "middle part" of ipv6 addresses in c, d and mix in zoneid/hash etc. net/netfilter/nf_conntrack_core.c | 28 +++++++++++----------------- 1 file changed, 11 insertions(+), 17 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f97bda06d2a9..d633ef028a3d 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -211,28 +211,22 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, unsigned int zoneid, const struct net *net) { - struct { - struct nf_conntrack_man src; - union nf_inet_addr dst_addr; - unsigned int zone; - u32 net_mix; - u16 dport; - u16 proto; - } __aligned(SIPHASH_ALIGNMENT) combined; + u64 a, b, c, d; get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); - memset(&combined, 0, sizeof(combined)); + /* The direction must be ignored, handle usable tuplehash members manually */ + a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3]; + b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3]; - /* The direction must be ignored, so handle usable members manually. */ - combined.src = tuple->src; - combined.dst_addr = tuple->dst.u3; - combined.zone = zoneid; - combined.net_mix = net_hash_mix(net); - combined.dport = (__force __u16)tuple->dst.u.all; - combined.proto = tuple->dst.protonum; + c = (u64)tuple->src.u.all << 32 | tuple->dst.u.all << 16 | tuple->dst.protonum; + d = (u64)zoneid << 32 | net_hash_mix(net); - return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd); + /* IPv4: u3.all[1,2,3] == 0 */ + c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2]; + d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2]; + + return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd); } static u32 scale_hash(u32 hash) -- 2.37.4