This function is used for every packet. siphash_4u64 is about 25% faster compared to buffer+memset+siphash_unaligned as reported by perf-diff. Signed-off-by: Florian Westphal <fw@xxxxxxxxx> --- net/netfilter/nf_conntrack_core.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index f97bda06d2a9..48c0c146cef1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -211,28 +211,23 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, unsigned int zoneid, const struct net *net) { - struct { - struct nf_conntrack_man src; - union nf_inet_addr dst_addr; - unsigned int zone; - u32 net_mix; - u16 dport; - u16 proto; - } __aligned(SIPHASH_ALIGNMENT) combined; + u64 a, b, c, d; get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd)); - memset(&combined, 0, sizeof(combined)); - /* The direction must be ignored, so handle usable members manually. */ - combined.src = tuple->src; - combined.dst_addr = tuple->dst.u3; - combined.zone = zoneid; - combined.net_mix = net_hash_mix(net); - combined.dport = (__force __u16)tuple->dst.u.all; - combined.proto = tuple->dst.protonum; - - return (u32)siphash(&combined, sizeof(combined), &nf_conntrack_hash_rnd); + a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[1]; + b = (u64)tuple->src.u3.all[2] << 32 | tuple->src.u3.all[3]; + + c = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[1]; + d = (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2]; + + a ^= (__force __u64)tuple->dst.u.all; + b += tuple->dst.protonum; + c ^= net_hash_mix(net); + d += zoneid; + + return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd); } static u32 scale_hash(u32 hash) -- 2.37.3