Re: [PATCH 2/5] eytzinger: Promote to include/linux/

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jan 26, 2024 at 05:06:52PM -0500, Kent Overstreet wrote:
> eytzinger trees are a faster alternative to binary search. They're a bit
> more expensive to setup, but lookups perform much better assuming the
> tree isn't entirely in cache.
> 
> Binary search is a worst case scenario for branch prediction and
> prefetching, but eytzinger trees have children adjacent in memory and
> thus we can prefetch before knowing the result of a comparison.
> 
> An eytzinger tree is a binary tree laid out in an array, with the same
> geometry as the usual binary heap construction, but used as a search
> tree instead.
> 
> Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx>

This looks more or less like what I remember of building heaps and
squinting at my horrible handwritten notes about eytzinger trees from
back in the day.

Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>

--D

> ---
>  fs/bcachefs/bset.c                         |   2 +-
>  fs/bcachefs/journal_seq_blacklist.c        |   6 +-
>  fs/bcachefs/replicas.c                     |  17 ++-
>  fs/bcachefs/replicas.h                     |   3 +-
>  fs/bcachefs/super-io.h                     |   2 +-
>  fs/bcachefs/util.c                         | 145 +--------------------
>  fs/bcachefs/util.h                         |   4 -
>  {fs/bcachefs => include/linux}/eytzinger.h |  56 ++++----
>  lib/sort.c                                 |  85 ++++++++++++
>  9 files changed, 136 insertions(+), 184 deletions(-)
>  rename {fs/bcachefs => include/linux}/eytzinger.h (78%)
> 
> diff --git a/fs/bcachefs/bset.c b/fs/bcachefs/bset.c
> index 3fd1085b6c61..1d77aa55d641 100644
> --- a/fs/bcachefs/bset.c
> +++ b/fs/bcachefs/bset.c
> @@ -9,12 +9,12 @@
>  #include "bcachefs.h"
>  #include "btree_cache.h"
>  #include "bset.h"
> -#include "eytzinger.h"
>  #include "trace.h"
>  #include "util.h"
>  
>  #include <asm/unaligned.h>
>  #include <linux/console.h>
> +#include <linux/eytzinger.h>
>  #include <linux/random.h>
>  #include <linux/prefetch.h>
>  
> diff --git a/fs/bcachefs/journal_seq_blacklist.c b/fs/bcachefs/journal_seq_blacklist.c
> index 0200e299cfbb..024c9b1b323f 100644
> --- a/fs/bcachefs/journal_seq_blacklist.c
> +++ b/fs/bcachefs/journal_seq_blacklist.c
> @@ -2,10 +2,11 @@
>  
>  #include "bcachefs.h"
>  #include "btree_iter.h"
> -#include "eytzinger.h"
>  #include "journal_seq_blacklist.h"
>  #include "super-io.h"
>  
> +#include <linux/eytzinger.h>
> +
>  /*
>   * journal_seq_blacklist machinery:
>   *
> @@ -119,8 +120,7 @@ int bch2_journal_seq_blacklist_add(struct bch_fs *c, u64 start, u64 end)
>  	return ret ?: bch2_blacklist_table_initialize(c);
>  }
>  
> -static int journal_seq_blacklist_table_cmp(const void *_l,
> -					   const void *_r, size_t size)
> +static int journal_seq_blacklist_table_cmp(const void *_l, const void *_r)
>  {
>  	const struct journal_seq_blacklist_table_entry *l = _l;
>  	const struct journal_seq_blacklist_table_entry *r = _r;
> diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
> index cc2672c12031..75fdce373f76 100644
> --- a/fs/bcachefs/replicas.c
> +++ b/fs/bcachefs/replicas.c
> @@ -6,12 +6,15 @@
>  #include "replicas.h"
>  #include "super-io.h"
>  
> +#include <linux/sort.h>
> +
>  static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
>  					    struct bch_replicas_cpu *);
>  
>  /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
> -static int bch2_memcmp(const void *l, const void *r, size_t size)
> +static int bch2_memcmp(const void *l, const void *r,  const void *priv)
>  {
> +	size_t size = (size_t) priv;
>  	return memcmp(l, r, size);
>  }
>  
> @@ -39,7 +42,8 @@ void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
>  
>  static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
>  {
> -	eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL);
> +	eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
> +			  bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
>  }
>  
>  static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
> @@ -824,10 +828,11 @@ static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
>  {
>  	unsigned i;
>  
> -	sort_cmp_size(cpu_r->entries,
> -		      cpu_r->nr,
> -		      cpu_r->entry_size,
> -		      bch2_memcmp, NULL);
> +	sort_r(cpu_r->entries,
> +	       cpu_r->nr,
> +	       cpu_r->entry_size,
> +	       bch2_memcmp, NULL,
> +	       (void *)(size_t)cpu_r->entry_size);
>  
>  	for (i = 0; i < cpu_r->nr; i++) {
>  		struct bch_replicas_entry_v1 *e =
> diff --git a/fs/bcachefs/replicas.h b/fs/bcachefs/replicas.h
> index 654a4b26d3a3..983cce782ac2 100644
> --- a/fs/bcachefs/replicas.h
> +++ b/fs/bcachefs/replicas.h
> @@ -3,9 +3,10 @@
>  #define _BCACHEFS_REPLICAS_H
>  
>  #include "bkey.h"
> -#include "eytzinger.h"
>  #include "replicas_types.h"
>  
> +#include <linux/eytzinger.h>
> +
>  void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *);
>  void bch2_replicas_entry_to_text(struct printbuf *,
>  				 struct bch_replicas_entry_v1 *);
> diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
> index 95e80e06316b..f37620919e11 100644
> --- a/fs/bcachefs/super-io.h
> +++ b/fs/bcachefs/super-io.h
> @@ -3,12 +3,12 @@
>  #define _BCACHEFS_SUPER_IO_H
>  
>  #include "extents.h"
> -#include "eytzinger.h"
>  #include "super_types.h"
>  #include "super.h"
>  #include "sb-members.h"
>  
>  #include <asm/byteorder.h>
> +#include <linux/eytzinger.h>
>  
>  static inline bool bch2_version_compatible(u16 version)
>  {
> diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
> index d7ea95abb9df..c7cf9c6fcf9a 100644
> --- a/fs/bcachefs/util.c
> +++ b/fs/bcachefs/util.c
> @@ -11,6 +11,7 @@
>  #include <linux/console.h>
>  #include <linux/ctype.h>
>  #include <linux/debugfs.h>
> +#include <linux/eytzinger.h>
>  #include <linux/freezer.h>
>  #include <linux/kthread.h>
>  #include <linux/log2.h>
> @@ -24,7 +25,6 @@
>  #include <linux/sched/clock.h>
>  #include <linux/mean_and_variance.h>
>  
> -#include "eytzinger.h"
>  #include "util.h"
>  
>  static const char si_units[] = "?kMGTPEZY";
> @@ -863,149 +863,6 @@ void memcpy_from_bio(void *dst, struct bio *src, struct bvec_iter src_iter)
>  	}
>  }
>  
> -static int alignment_ok(const void *base, size_t align)
> -{
> -	return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ||
> -		((unsigned long)base & (align - 1)) == 0;
> -}
> -
> -static void u32_swap(void *a, void *b, size_t size)
> -{
> -	u32 t = *(u32 *)a;
> -	*(u32 *)a = *(u32 *)b;
> -	*(u32 *)b = t;
> -}
> -
> -static void u64_swap(void *a, void *b, size_t size)
> -{
> -	u64 t = *(u64 *)a;
> -	*(u64 *)a = *(u64 *)b;
> -	*(u64 *)b = t;
> -}
> -
> -static void generic_swap(void *a, void *b, size_t size)
> -{
> -	char t;
> -
> -	do {
> -		t = *(char *)a;
> -		*(char *)a++ = *(char *)b;
> -		*(char *)b++ = t;
> -	} while (--size > 0);
> -}
> -
> -static inline int do_cmp(void *base, size_t n, size_t size,
> -			 int (*cmp_func)(const void *, const void *, size_t),
> -			 size_t l, size_t r)
> -{
> -	return cmp_func(base + inorder_to_eytzinger0(l, n) * size,
> -			base + inorder_to_eytzinger0(r, n) * size,
> -			size);
> -}
> -
> -static inline void do_swap(void *base, size_t n, size_t size,
> -			   void (*swap_func)(void *, void *, size_t),
> -			   size_t l, size_t r)
> -{
> -	swap_func(base + inorder_to_eytzinger0(l, n) * size,
> -		  base + inorder_to_eytzinger0(r, n) * size,
> -		  size);
> -}
> -
> -void eytzinger0_sort(void *base, size_t n, size_t size,
> -		     int (*cmp_func)(const void *, const void *, size_t),
> -		     void (*swap_func)(void *, void *, size_t))
> -{
> -	int i, c, r;
> -
> -	if (!swap_func) {
> -		if (size == 4 && alignment_ok(base, 4))
> -			swap_func = u32_swap;
> -		else if (size == 8 && alignment_ok(base, 8))
> -			swap_func = u64_swap;
> -		else
> -			swap_func = generic_swap;
> -	}
> -
> -	/* heapify */
> -	for (i = n / 2 - 1; i >= 0; --i) {
> -		for (r = i; r * 2 + 1 < n; r = c) {
> -			c = r * 2 + 1;
> -
> -			if (c + 1 < n &&
> -			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
> -				c++;
> -
> -			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
> -				break;
> -
> -			do_swap(base, n, size, swap_func, r, c);
> -		}
> -	}
> -
> -	/* sort */
> -	for (i = n - 1; i > 0; --i) {
> -		do_swap(base, n, size, swap_func, 0, i);
> -
> -		for (r = 0; r * 2 + 1 < i; r = c) {
> -			c = r * 2 + 1;
> -
> -			if (c + 1 < i &&
> -			    do_cmp(base, n, size, cmp_func, c, c + 1) < 0)
> -				c++;
> -
> -			if (do_cmp(base, n, size, cmp_func, r, c) >= 0)
> -				break;
> -
> -			do_swap(base, n, size, swap_func, r, c);
> -		}
> -	}
> -}
> -
> -void sort_cmp_size(void *base, size_t num, size_t size,
> -	  int (*cmp_func)(const void *, const void *, size_t),
> -	  void (*swap_func)(void *, void *, size_t size))
> -{
> -	/* pre-scale counters for performance */
> -	int i = (num/2 - 1) * size, n = num * size, c, r;
> -
> -	if (!swap_func) {
> -		if (size == 4 && alignment_ok(base, 4))
> -			swap_func = u32_swap;
> -		else if (size == 8 && alignment_ok(base, 8))
> -			swap_func = u64_swap;
> -		else
> -			swap_func = generic_swap;
> -	}
> -
> -	/* heapify */
> -	for ( ; i >= 0; i -= size) {
> -		for (r = i; r * 2 + size < n; r  = c) {
> -			c = r * 2 + size;
> -			if (c < n - size &&
> -			    cmp_func(base + c, base + c + size, size) < 0)
> -				c += size;
> -			if (cmp_func(base + r, base + c, size) >= 0)
> -				break;
> -			swap_func(base + r, base + c, size);
> -		}
> -	}
> -
> -	/* sort */
> -	for (i = n - size; i > 0; i -= size) {
> -		swap_func(base, base + i, size);
> -		for (r = 0; r * 2 + size < i; r = c) {
> -			c = r * 2 + size;
> -			if (c < i - size &&
> -			    cmp_func(base + c, base + c + size, size) < 0)
> -				c += size;
> -			if (cmp_func(base + r, base + c, size) >= 0)
> -				break;
> -			swap_func(base + r, base + c, size);
> -		}
> -	}
> -}
> -
>  static void mempool_free_vp(void *element, void *pool_data)
>  {
>  	size_t size = (size_t) pool_data;
> diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
> index 0059481995ef..c3b11c3d24ea 100644
> --- a/fs/bcachefs/util.h
> +++ b/fs/bcachefs/util.h
> @@ -737,10 +737,6 @@ static inline void memset_u64s_tail(void *s, int c, unsigned bytes)
>  	memset(s + bytes, c, rem);
>  }
>  
> -void sort_cmp_size(void *base, size_t num, size_t size,
> -	  int (*cmp_func)(const void *, const void *, size_t),
> -	  void (*swap_func)(void *, void *, size_t));
> -
>  /* just the memmove, doesn't update @_nr */
>  #define __array_insert_item(_array, _nr, _pos)				\
>  	memmove(&(_array)[(_pos) + 1],					\
> diff --git a/fs/bcachefs/eytzinger.h b/include/linux/eytzinger.h
> similarity index 78%
> rename from fs/bcachefs/eytzinger.h
> rename to include/linux/eytzinger.h
> index b04750dbf870..9565a5c26cd5 100644
> --- a/fs/bcachefs/eytzinger.h
> +++ b/include/linux/eytzinger.h
> @@ -1,27 +1,37 @@
>  /* SPDX-License-Identifier: GPL-2.0 */
> -#ifndef _EYTZINGER_H
> -#define _EYTZINGER_H
> +#ifndef _LINUX_EYTZINGER_H
> +#define _LINUX_EYTZINGER_H
>  
>  #include <linux/bitops.h>
>  #include <linux/log2.h>
>  
> -#include "util.h"
> +#ifdef EYTZINGER_DEBUG
> +#define EYTZINGER_BUG_ON(cond)		BUG_ON(cond)
> +#else
> +#define EYTZINGER_BUG_ON(cond)
> +#endif
>  
>  /*
>   * Traversal for trees in eytzinger layout - a full binary tree layed out in an
> - * array
> - */
> -
> -/*
> - * One based indexing version:
> + * array.
>   *
> - * With one based indexing each level of the tree starts at a power of two -
> - * good for cacheline alignment:
> + * Consider using an eytzinger tree any time you would otherwise be doing binary
> + * search over an array. Binary search is a worst case scenario for branch
> + * prediction and prefetching, but in an eytzinger tree every node's children
> + * are adjacent in memory, thus we can prefetch children before knowing the
> + * result of the comparison, assuming multiple nodes fit on a cacheline.
> + *
> + * Two variants are provided, for one based indexing and zero based indexing.
> + *
> + * Zero based indexing is more convenient, but one based indexing has better
> + * alignment and thus better performance because each new level of the tree
> + * starts at a power of two, and thus if element 0 was cacheline aligned, each
> + * new level will be as well.
>   */
>  
>  static inline unsigned eytzinger1_child(unsigned i, unsigned child)
>  {
> -	EBUG_ON(child > 1);
> +	EYTZINGER_BUG_ON(child > 1);
>  
>  	return (i << 1) + child;
>  }
> @@ -58,7 +68,7 @@ static inline unsigned eytzinger1_last(unsigned size)
>  
>  static inline unsigned eytzinger1_next(unsigned i, unsigned size)
>  {
> -	EBUG_ON(i > size);
> +	EYTZINGER_BUG_ON(i > size);
>  
>  	if (eytzinger1_right_child(i) <= size) {
>  		i = eytzinger1_right_child(i);
> @@ -74,7 +84,7 @@ static inline unsigned eytzinger1_next(unsigned i, unsigned size)
>  
>  static inline unsigned eytzinger1_prev(unsigned i, unsigned size)
>  {
> -	EBUG_ON(i > size);
> +	EYTZINGER_BUG_ON(i > size);
>  
>  	if (eytzinger1_left_child(i) <= size) {
>  		i = eytzinger1_left_child(i) + 1;
> @@ -101,7 +111,7 @@ static inline unsigned __eytzinger1_to_inorder(unsigned i, unsigned size,
>  	unsigned shift = __fls(size) - b;
>  	int s;
>  
> -	EBUG_ON(!i || i > size);
> +	EYTZINGER_BUG_ON(!i || i > size);
>  
>  	i  ^= 1U << b;
>  	i <<= 1;
> @@ -126,7 +136,7 @@ static inline unsigned __inorder_to_eytzinger1(unsigned i, unsigned size,
>  	unsigned shift;
>  	int s;
>  
> -	EBUG_ON(!i || i > size);
> +	EYTZINGER_BUG_ON(!i || i > size);
>  
>  	/*
>  	 * sign bit trick:
> @@ -164,7 +174,7 @@ static inline unsigned inorder_to_eytzinger1(unsigned i, unsigned size)
>  
>  static inline unsigned eytzinger0_child(unsigned i, unsigned child)
>  {
> -	EBUG_ON(child > 1);
> +	EYTZINGER_BUG_ON(child > 1);
>  
>  	return (i << 1) + 1 + child;
>  }
> @@ -231,11 +241,9 @@ static inline unsigned inorder_to_eytzinger0(unsigned i, unsigned size)
>  	     (_i) != -1;				\
>  	     (_i) = eytzinger0_next((_i), (_size)))
>  
> -typedef int (*eytzinger_cmp_fn)(const void *l, const void *r, size_t size);
> -
>  /* return greatest node <= @search, or -1 if not found */
>  static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
> -					 eytzinger_cmp_fn cmp, const void *search)
> +					 cmp_func_t cmp, const void *search)
>  {
>  	unsigned i, n = 0;
>  
> @@ -244,7 +252,7 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
>  
>  	do {
>  		i = n;
> -		n = eytzinger0_child(i, cmp(search, base + i * size, size) >= 0);
> +		n = eytzinger0_child(i, cmp(search, base + i * size) >= 0);
>  	} while (n < nr);
>  
>  	if (n & 1) {
> @@ -274,8 +282,8 @@ static inline ssize_t eytzinger0_find_le(void *base, size_t nr, size_t size,
>  	_i;								\
>  })
>  
> -void eytzinger0_sort(void *, size_t, size_t,
> -		    int (*cmp_func)(const void *, const void *, size_t),
> -		    void (*swap_func)(void *, void *, size_t));
> +void eytzinger0_sort_r(void *, size_t, size_t,
> +		       cmp_r_func_t, swap_r_func_t, const void *);
> +void eytzinger0_sort(void *, size_t, size_t, cmp_func_t, swap_func_t);
>  
> -#endif /* _EYTZINGER_H */
> +#endif /* _LINUX_EYTZINGER_H */
> diff --git a/lib/sort.c b/lib/sort.c
> index b399bf10d675..3dfa83d86bbb 100644
> --- a/lib/sort.c
> +++ b/lib/sort.c
> @@ -290,3 +290,88 @@ void sort(void *base, size_t num, size_t size,
>  	return sort_r(base, num, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
>  }
>  EXPORT_SYMBOL(sort);
> +
> +#include <linux/eytzinger.h>
> +
> +static inline int eytzinger0_do_cmp(void *base, size_t n, size_t size,
> +			 cmp_r_func_t cmp_func, const void *priv,
> +			 size_t l, size_t r)
> +{
> +	return do_cmp(base + inorder_to_eytzinger0(l, n) * size,
> +		      base + inorder_to_eytzinger0(r, n) * size,
> +		      cmp_func, priv);
> +}
> +
> +static inline void eytzinger0_do_swap(void *base, size_t n, size_t size,
> +			   swap_r_func_t swap_func, const void *priv,
> +			   size_t l, size_t r)
> +{
> +	do_swap(base + inorder_to_eytzinger0(l, n) * size,
> +		base + inorder_to_eytzinger0(r, n) * size,
> +		size, swap_func, priv);
> +}
> +
> +void eytzinger0_sort_r(void *base, size_t n, size_t size,
> +		       cmp_r_func_t cmp_func,
> +		       swap_r_func_t swap_func,
> +		       const void *priv)
> +{
> +	int i, c, r;
> +
> +	if (!swap_func) {
> +		if (is_aligned(base, size, 8))
> +			swap_func = SWAP_WORDS_64;
> +		else if (is_aligned(base, size, 4))
> +			swap_func = SWAP_WORDS_32;
> +		else
> +			swap_func = SWAP_BYTES;
> +	}
> +
> +	/* heapify */
> +	for (i = n / 2 - 1; i >= 0; --i) {
> +		for (r = i; r * 2 + 1 < n; r = c) {
> +			c = r * 2 + 1;
> +
> +			if (c + 1 < n &&
> +			    eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
> +				c++;
> +
> +			if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
> +				break;
> +
> +			eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
> +		}
> +	}
> +
> +	/* sort */
> +	for (i = n - 1; i > 0; --i) {
> +		eytzinger0_do_swap(base, n, size, swap_func, priv, 0, i);
> +
> +		for (r = 0; r * 2 + 1 < i; r = c) {
> +			c = r * 2 + 1;
> +
> +			if (c + 1 < i &&
> +			    eytzinger0_do_cmp(base, n, size, cmp_func, priv, c, c + 1) < 0)
> +				c++;
> +
> +			if (eytzinger0_do_cmp(base, n, size, cmp_func, priv, r, c) >= 0)
> +				break;
> +
> +			eytzinger0_do_swap(base, n, size, swap_func, priv, r, c);
> +		}
> +	}
> +}
> +EXPORT_SYMBOL_GPL(eytzinger0_sort_r);
> +
> +void eytzinger0_sort(void *base, size_t n, size_t size,
> +		     cmp_func_t cmp_func,
> +		     swap_func_t swap_func)
> +{
> +	struct wrapper w = {
> +		.cmp  = cmp_func,
> +		.swap = swap_func,
> +	};
> +
> +	return eytzinger0_sort_r(base, n, size, _CMP_WRAPPER, SWAP_WRAPPER, &w);
> +}
> +EXPORT_SYMBOL_GPL(eytzinger0_sort);
> -- 
> 2.43.0
> 
> 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux