On Fri, Jan 26, 2024 at 05:06:52PM -0500, Kent Overstreet wrote: > eytzinger trees are a faster alternative to binary search. They're a bit > more expensive to setup, but lookups perform much better assuming the > tree isn't entirely in cache. > > Binary search is a worst case scenario for branch prediction and > prefetching, but eytzinger trees have children adjacent in memory and > thus we can prefetch before knowing the result of a comparison. > > An eytzinger tree is a binary tree laid out in an array, with the same > geometry as the usual binary heap construction, but used as a search > tree instead. > > Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> This looks more or less like what I remember of building heaps and squinting at my horrible handwritten notes about eytzinger trees from back in the day. Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx> --D > --- > fs/bcachefs/bset.c | 2 +- > fs/bcachefs/journal_seq_blacklist.c | 6 +- > fs/bcachefs/replicas.c | 17 ++- > fs/bcachefs/replicas.h | 3 +- > fs/bcachefs/super-io.h | 2 +- > fs/bcachefs/util.c | 145 +-------------------- > fs/bcachefs/util.h | 4 - > {fs/bcachefs => include/linux}/eytzinger.h | 56 ++++---- > lib/sort.c | 85 ++++++++++++ > 9 files changed, 136 insertions(+), 184 deletions(-) > rename {fs/bcachefs => include/linux}/eytzinger.h (78%) > > diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c > index 3fd1085b6c61..1d77aa55d641 100644 > --- a/fs/bcachefs/bset.c > +++ b/fs/bcachefs/bset.c > @@ -9,12 +9,12 @@ > #include "bcachefs.h" > #include "btree_cache.h" > #include "bset.h" > -#include "eytzinger.h" > #include "trace.h" > #include "util.h" > > #include <asm/unaligned.h> > #include <linux/console.h> > +#include <linux/eytzinger.h> > #include <linux/random.h> > #include <linux/prefetch.h> > > diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c > index 0200e299cfbb..024c9b1b323f 100644 > --- a/fs/bcachefs/journal_seq_blacklist.c > +++ b/fs/bcachefs/journal_seq_blacklist.c > @@ -2,10 +2,11 @@ > > #include "bcachefs.h" > #include "btree_iter.h" > -#include "eytzinger.h" > #include "journal_seq_blacklist.h" > #include "super-io.h" > > +#include <linux/eytzinger.h> > + > /* > * journal_seq_blacklist machinery: > * > @@ -119,8 +120,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end) > return ret ?: bch2_blacklist_table_initialize(c); > } > > -static int journal_seq_blacklist_table_cmp(const void *_l, > - const void *_r, size_t size) > +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r) > { > const struct journal_seq_blacklist_table_entry *l = _l; > const struct journal_seq_blacklist_table_entry *r = _r; > diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c > index cc2672c12031..75fdce373f76 100644 > --- a/fs/bcachefs/replicas.c > +++ b/fs/bcachefs/replicas.c > @@ -6,12 +6,15 @@ > #include "replicas.h" > #include "super-io.h" > > +#include <linux/sort.h> > + > static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, > struct bch_replicas_cpu *); > > /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ > -static int bch2_memcmp(const void *l, const void *r, size_t size) > +static int bch2_memcmp(const void *l, const void *r, const void *priv) > { > + size_t size = (size_t) priv; > return memcmp(l, r, size); > } > > @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) > > static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) > { > - eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); > + eytzinger0_sort_r(r->entries, r->nr, r->entry_size, > + bch2_memcmp, NULL, (void *)(size_t)r->entry_size); > } > > static void bch2_replicas_entry_v0_to_text(struct printbuf *out, > @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, > { > unsigned i; > > - sort_cmp_size(cpu_r->entries, > - cpu_r->nr, > - cpu_r->entry_size, > - bch2_memcmp, NULL); > + sort_r(cpu_r->entries, > + cpu_r->nr, > + cpu_r->entry_size, > + bch2_memcmp, NULL, > + (void *)(size_t)cpu_r->entry_size); > > for (i = 0; i < cpu_r->nr; i++) { > struct bch_replicas_entry_v1 *e = > diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h > index 654a4b26d3a3..983cce782ac2 100644 > --- a/fs/bcachefs/replicas.h > +++ b/fs/bcachefs/replicas.h > @@ -3,9 +3,10 @@ > #define _BCACHEFS_REPLICAS_H > > #include "bkey.h" > -#include "eytzinger.h" > #include "replicas_types.h" > > +#include <linux/eytzinger.h> > + > void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *); > void bch2_replicas_entry_to_text(struct printbuf *, > struct bch_replicas_entry_v1 *); > diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h > index 95e80e06316b..f37620919e11 100644 > --- a/fs/bcachefs/super-io.h > +++ b/fs/bcachefs/super-io.h > @@ -3,12 +3,12 @@ > #define _BCACHEFS_SUPER_IO_H > > #include "extents.h" > -#include "eytzinger.h" > #include "super_types.h" > #include "super.h" > #include "sb-members.h" > > #include <asm/byteorder.h> > +#include <linux/eytzinger.h> > > static inline bool bch2_version_compatible(u16 version) > { > diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c > index d7ea95abb9df..c7cf9c6fcf9a 100644 > --- a/fs/bcachefs/util.c > +++ b/fs/bcachefs/util.c > @@ -11,6 +11,7 @@ > #include <linux/console.h> > #include <linux/ctype.h> > #include <linux/debugfs.h> > +#include <linux/eytzinger.h> > #include <linux/freezer.h> > #include <linux/kthread.h> > #include <linux/log2.h> > @@ -24,7 +25,6 @@ > #include <linux/sched/clock.h> > #include <linux/mean_and_variance.h> > > -#include "eytzinger.h" > #include "util.h" > > static const char si_units[] = "?kMGTPEZY"; > @@ -863,149 +863,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter) > } > } > > -static int alignment_ok(const void *base, size_t align) > -{ > - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) || > - ((unsigned long)base & (align - 1)) == 0; > -} > - > -static void u32_swap(void *a, void *b, size_t size) > -{ > - u32 t = *(u32 *)a; > - *(u32 *)a = *(u32 *)b; > - *(u32 *)b = t; > -} > - > -static void u64_swap(void *a, void *b, size_t size) > -{ > - u64 t = *(u64 *)a; > - *(u64 *)a = *(u64 *)b; > - *(u64 *)b = t; > -} > - > -static void generic_swap(void *a, void *b, size_t size) > -{ > - char t; > - > - do { > - t = *(char *)a; > - *(char *)a++ = *(char *)b; > - *(char *)b++ = t; > - } while (--size > 0); > -} > - > -static inline int do_cmp(void *base, size_t n, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - size_t l, size_t r) > -{ > - return cmp_func(base + inorder_to_eytzinger0(l, n) * size, > - base + inorder_to_eytzinger0(r, n) * size, > - size); > -} > - > -static inline void do_swap(void *base, size_t n, size_t size, > - void (*swap_func)(void *, void *, size_t), > - size_t l, size_t r) > -{ > - swap_func(base + inorder_to_eytzinger0(l, n) * size, > - base + inorder_to_eytzinger0(r, n) * size, > - size); > -} > - > -void eytzinger0_sort(void *base, size_t n, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t)) > -{ > - int i, c, r; > - > - if (!swap_func) { > - if (size == 4 && alignment_ok(base, 4)) > - swap_func = u32_swap; > - else if (size == 8 && alignment_ok(base, 8)) > - swap_func = u64_swap; > - else > - swap_func = generic_swap; > - } > - > - /* heapify */ > - for (i = n / 2 - 1; i >= 0; --i) { > - for (r = i; r * 2 + 1 < n; r = c) { > - c = r * 2 + 1; > - > - if (c + 1 < n && > - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) > - c++; > - > - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) > - break; > - > - do_swap(base, n, size, swap_func, r, c); > - } > - } > - > - /* sort */ > - for (i = n - 1; i > 0; --i) { > - do_swap(base, n, size, swap_func, 0, i); > - > - for (r = 0; r * 2 + 1 < i; r = c) { > - c = r * 2 + 1; > - > - if (c + 1 < i && > - do_cmp(base, n, size, cmp_func, c, c + 1) < 0) > - c++; > - > - if (do_cmp(base, n, size, cmp_func, r, c) >= 0) > - break; > - > - do_swap(base, n, size, swap_func, r, c); > - } > - } > -} > - > -void sort_cmp_size(void *base, size_t num, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t size)) > -{ > - /* pre-scale counters for performance */ > - int i = (num/2 - 1) * size, n = num * size, c, r; > - > - if (!swap_func) { > - if (size == 4 && alignment_ok(base, 4)) > - swap_func = u32_swap; > - else if (size == 8 && alignment_ok(base, 8)) > - swap_func = u64_swap; > - else > - swap_func = generic_swap; > - } > - > - /* heapify */ > - for ( ; i >= 0; i -= size) { > - for (r = i; r * 2 + size < n; r = c) { > - c = r * 2 + size; > - if (c < n - size && > - cmp_func(base + c, base + c + size, size) < 0) > - c += size; > - if (cmp_func(base + r, base + c, size) >= 0) > - break; > - swap_func(base + r, base + c, size); > - } > - } > - > - /* sort */ > - for (i = n - size; i > 0; i -= size) { > - swap_func(base, base + i, size); > - for (r = 0; r * 2 + size < i; r = c) { > - c = r * 2 + size; > - if (c < i - size && > - cmp_func(base + c, base + c + size, size) < 0) > - c += size; > - if (cmp_func(base + r, base + c, size) >= 0) > - break; > - swap_func(base + r, base + c, size); > - } > - } > -} > - > static void mempool_free_vp(void *element, void *pool_data) > { > size_t size = (size_t) pool_data; > diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h > index 0059481995ef..c3b11c3d24ea 100644 > --- a/fs/bcachefs/util.h > +++ b/fs/bcachefs/util.h > @@ -737,10 +737,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes) > memset(s + bytes, c, rem); > } > > -void sort_cmp_size(void *base, size_t num, size_t size, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t)); > - > /* just the memmove, doesn't update @_nr */ > #define __array_insert_item(_array, _nr, _pos) \ > memmove(&(_array)[(_pos) + 1], \ > diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h > similarity index 78% > rename from fs/bcachefs/eytzinger.h > rename to include/linux/eytzinger.h > index b04750dbf870..9565a5c26cd5 100644 > --- a/fs/bcachefs/eytzinger.h > +++ b/include/linux/eytzinger.h > @@ -1,27 +1,37 @@ > /* SPDX-License-Identifier: GPL-2.0 */ > -#ifndef _EYTZINGER_H > -#define _EYTZINGER_H > +#ifndef _LINUX_EYTZINGER_H > +#define _LINUX_EYTZINGER_H > > #include <linux/bitops.h> > #include <linux/log2.h> > > -#include "util.h" > +#ifdef EYTZINGER_DEBUG > +#define EYTZINGER_BUG_ON(cond) BUG_ON(cond) > +#else > +#define EYTZINGER_BUG_ON(cond) > +#endif > > /* > * Traversal for trees in eytzinger layout - a full binary tree layed out in an > - * array > - */ > - > -/* > - * One based indexing version: > + * array. > * > - * With one based indexing each level of the tree starts at a power of two - > - * good for cacheline alignment: > + * Consider using an eytzinger tree any time you would otherwise be doing binary > + * search over an array. Binary search is a worst case scenario for branch > + * prediction and prefetching, but in an eytzinger tree every node's children > + * are adjacent in memory, thus we can prefetch children before knowing the > + * result of the comparison, assuming multiple nodes fit on a cacheline. > + * > + * Two variants are provided, for one based indexing and zero based indexing. > + * > + * Zero based indexing is more convenient, but one based indexing has better > + * alignment and thus better performance because each new level of the tree > + * starts at a power of two, and thus if element 0 was cacheline aligned, each > + * new level will be as well. > */ > > static inline unsigned eytzinger1_child(unsigned i, unsigned child) > { > - EBUG_ON(child > 1); > + EYTZINGER_BUG_ON(child > 1); > > return (i << 1) + child; > } > @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size) > > static inline unsigned eytzinger1_next(unsigned i, unsigned size) > { > - EBUG_ON(i > size); > + EYTZINGER_BUG_ON(i > size); > > if (eytzinger1_right_child(i) <= size) { > i = eytzinger1_right_child(i); > @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size) > > static inline unsigned eytzinger1_prev(unsigned i, unsigned size) > { > - EBUG_ON(i > size); > + EYTZINGER_BUG_ON(i > size); > > if (eytzinger1_left_child(i) <= size) { > i = eytzinger1_left_child(i) + 1; > @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size, > unsigned shift = __fls(size) - b; > int s; > > - EBUG_ON(!i || i > size); > + EYTZINGER_BUG_ON(!i || i > size); > > i ^= 1U << b; > i <<= 1; > @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size, > unsigned shift; > int s; > > - EBUG_ON(!i || i > size); > + EYTZINGER_BUG_ON(!i || i > size); > > /* > * sign bit trick: > @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size) > > static inline unsigned eytzinger0_child(unsigned i, unsigned child) > { > - EBUG_ON(child > 1); > + EYTZINGER_BUG_ON(child > 1); > > return (i << 1) + 1 + child; > } > @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size) > (_i) != -1; \ > (_i) = eytzinger0_next((_i), (_size))) > > -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size); > - > /* return greatest node <= @search, or -1 if not found */ > static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, > - eytzinger_cmp_fn cmp, const void *search) > + cmp_func_t cmp, const void *search) > { > unsigned i, n = 0; > > @@ -244,7 +252,7 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, > > do { > i = n; > - n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0); > + n = eytzinger0_child(i, cmp(search, base + i * size) >= 0); > } while (n < nr); > > if (n & 1) { > @@ -274,8 +282,8 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size, > _i; \ > }) > > -void eytzinger0_sort(void *, size_t, size_t, > - int (*cmp_func)(const void *, const void *, size_t), > - void (*swap_func)(void *, void *, size_t)); > +void eytzinger0_sort_r(void *, size_t, size_t, > + cmp_r_func_t, swap_r_func_t, const void *); > +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t); > > -#endif /* _EYTZINGER_H */ > +#endif /* _LINUX_EYTZINGER_H */ > diff --git a/lib/sort.c b/lib/sort.c > index b399bf10d675..3dfa83d86bbb 100644 > --- a/lib/sort.c > +++ b/lib/sort.c > @@ -290,3 +290,88 @@ void sort(void *base, size_t num, size_t size, > return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); > } > EXPORT_SYMBOL(sort); > + > +#include <linux/eytzinger.h> > + > +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size, > + cmp_r_func_t cmp_func, const void *priv, > + size_t l, size_t r) > +{ > + return do_cmp(base + inorder_to_eytzinger0(l, n) * size, > + base + inorder_to_eytzinger0(r, n) * size, > + cmp_func, priv); > +} > + > +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size, > + swap_r_func_t swap_func, const void *priv, > + size_t l, size_t r) > +{ > + do_swap(base + inorder_to_eytzinger0(l, n) * size, > + base + inorder_to_eytzinger0(r, n) * size, > + size, swap_func, priv); > +} > + > +void eytzinger0_sort_r(void *base, size_t n, size_t size, > + cmp_r_func_t cmp_func, > + swap_r_func_t swap_func, > + const void *priv) > +{ > + int i, c, r; > + > + if (!swap_func) { > + if (is_aligned(base, size, 8)) > + swap_func = SWAP_WORDS_64; > + else if (is_aligned(base, size, 4)) > + swap_func = SWAP_WORDS_32; > + else > + swap_func = SWAP_BYTES; > + } > + > + /* heapify */ > + for (i = n / 2 - 1; i >= 0; --i) { > + for (r = i; r * 2 + 1 < n; r = c) { > + c = r * 2 + 1; > + > + if (c + 1 < n && > + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) > + c++; > + > + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) > + break; > + > + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); > + } > + } > + > + /* sort */ > + for (i = n - 1; i > 0; --i) { > + eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i); > + > + for (r = 0; r * 2 + 1 < i; r = c) { > + c = r * 2 + 1; > + > + if (c + 1 < i && > + eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0) > + c++; > + > + if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0) > + break; > + > + eytzinger0_do_swap(base, n, size, swap_func, priv, r, c); > + } > + } > +} > +EXPORT_SYMBOL_GPL(eytzinger0_sort_r); > + > +void eytzinger0_sort(void *base, size_t n, size_t size, > + cmp_func_t cmp_func, > + swap_func_t swap_func) > +{ > + struct wrapper w = { > + .cmp = cmp_func, > + .swap = swap_func, > + }; > + > + return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w); > +} > +EXPORT_SYMBOL_GPL(eytzinger0_sort); > -- > 2.43.0 > >