memcmp() is generally slow. Compare keys in long if possible. This improves xdp_flow performance. This is included in this series just to demonstrate to what extent xdp_flow performance can increase. Signed-off-by: Toshiaki Makita <toshiaki.makita1@xxxxxxxxx> --- kernel/bpf/hashtab.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 22066a6..8b5ffd4 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -417,6 +417,29 @@ static inline struct hlist_nulls_head *select_bucket(struct bpf_htab *htab, u32 return &__select_bucket(htab, hash)->head; } +/* key1 must be aligned to sizeof long */ +static bool key_equal(void *key1, void *key2, u32 size) +{ + /* Check for key1 */ + BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct htab_elem, key), + sizeof(long))); + + if (IS_ALIGNED((unsigned long)key2 | (unsigned long)size, + sizeof(long))) { + unsigned long *lkey1, *lkey2; + + for (lkey1 = key1, lkey2 = key2; size > 0; + lkey1++, lkey2++, size -= sizeof(long)) { + if (*lkey1 != *lkey2) + return false; + } + + return true; + } + + return !memcmp(key1, key2, size); +} + /* this lookup function can only be called with bucket lock taken */ static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash, void *key, u32 key_size) @@ -425,7 +448,7 @@ static struct htab_elem *lookup_elem_raw(struct hlist_nulls_head *head, u32 hash struct htab_elem *l; hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) - if (l->hash == hash && !memcmp(&l->key, key, key_size)) + if (l->hash == hash && key_equal(&l->key, key, key_size)) return l; return NULL; @@ -444,7 +467,7 @@ static struct htab_elem *lookup_nulls_elem_raw(struct hlist_nulls_head *head, again: hlist_nulls_for_each_entry_rcu(l, n, head, hash_node) - if (l->hash == hash && !memcmp(&l->key, key, key_size)) + if (l->hash == hash && key_equal(&l->key, key, key_size)) return l; if (unlikely(get_nulls_value(n) != (hash & (n_buckets - 1)))) -- 1.8.3.1