On Wed, Dec 29, 2021 at 2:36 PM Dirk Müller <dmueller@xxxxxxx> wrote: > > On x86_64, currently 3 variants of AVX512, 3 variants of AVX2 > and 3 variants of SSE2 are benchmarked on initialization, taking > between 144-153 jiffies. Over a hardware pool of various generations > of intel cpus I could not find a single case where SSE2 won over > AVX2 or AVX512. There are cases where AVX2 wins over AVX512. > > By giving AVXx variants higher priority over SSE, we can generally > skip 3 benchmarks which speeds this up by 33% - 50%, depending on > whether AVX512 is available. > > Signed-off-by: Dirk Müller <dmueller@xxxxxxx> > --- > include/linux/raid/pq.h | 2 +- > lib/raid6/algos.c | 2 +- > lib/raid6/avx2.c | 6 +++--- > lib/raid6/avx512.c | 6 +++--- > 4 files changed, 8 insertions(+), 8 deletions(-) > > diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h > index 154e954b711d..d6e5a1feb947 100644 > --- a/include/linux/raid/pq.h > +++ b/include/linux/raid/pq.h > @@ -81,7 +81,7 @@ struct raid6_calls { > void (*xor_syndrome)(int, int, int, size_t, void **); > int (*valid)(void); /* Returns 1 if this routine set is usable */ > const char *name; /* Name of this routine set */ > - int prefer; /* Has special performance attribute */ > + int priority; /* Relative priority ranking if non-zero */ We need more explanation/documentation about 0 vs. 1 vs. 2 priority. > }; > > /* Selected algorithm */ > diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c > index 889033b7fc0d..d1e8ff837a32 100644 > --- a/lib/raid6/algos.c > +++ b/lib/raid6/algos.c > @@ -151,7 +151,7 @@ static inline const struct raid6_calls *raid6_choose_gen( > const struct raid6_calls *best; > > for (bestgenperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { > - if (!best || (*algo)->prefer >= best->prefer) { > + if (!best || (*algo)->priority >= best->priority) { > if ((*algo)->valid && !(*algo)->valid()) If the module load time is really critical, maybe we can run all ->valid() calls first and find the highest valid priority. Then, we only run the benchmark for these algorithms. Does this make sense? Thanks, Song > continue; > > diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c > index f299476e1d76..31be496b8c81 100644 > --- a/lib/raid6/avx2.c > +++ b/lib/raid6/avx2.c > @@ -132,7 +132,7 @@ const struct raid6_calls raid6_avx2x1 = { > raid6_avx21_xor_syndrome, > raid6_have_avx2, > "avx2x1", > - 1 /* Has cache hints */ > + .priority = 2 > }; > > /* > @@ -262,7 +262,7 @@ const struct raid6_calls raid6_avx2x2 = { > raid6_avx22_xor_syndrome, > raid6_have_avx2, > "avx2x2", > - 1 /* Has cache hints */ > + .priority = 2 > }; > > #ifdef CONFIG_X86_64 > @@ -465,6 +465,6 @@ const struct raid6_calls raid6_avx2x4 = { > raid6_avx24_xor_syndrome, > raid6_have_avx2, > "avx2x4", > - 1 /* Has cache hints */ > + .priority = 2 > }; > #endif > diff --git a/lib/raid6/avx512.c b/lib/raid6/avx512.c > index bb684d144ee2..63ae197c3294 100644 > --- a/lib/raid6/avx512.c > +++ b/lib/raid6/avx512.c > @@ -162,7 +162,7 @@ const struct raid6_calls raid6_avx512x1 = { > raid6_avx5121_xor_syndrome, > raid6_have_avx512, > "avx512x1", > - 1 /* Has cache hints */ > + .priority = 2 > }; > > /* > @@ -319,7 +319,7 @@ const struct raid6_calls raid6_avx512x2 = { > raid6_avx5122_xor_syndrome, > raid6_have_avx512, > "avx512x2", > - 1 /* Has cache hints */ > + .priority = 2 > }; > > #ifdef CONFIG_X86_64 > @@ -557,7 +557,7 @@ const struct raid6_calls raid6_avx512x4 = { > raid6_avx5124_xor_syndrome, > raid6_have_avx512, > "avx512x4", > - 1 /* Has cache hints */ > + .priority = 2 > }; > #endif > > -- > 2.34.1 >