I found this output confusing: raid5: measuring checksumming speed 8regs : 1896.000 MB/sec 8regs_prefetch: 2396.000 MB/sec 32regs : 1772.000 MB/sec 32regs_prefetch: 1664.000 MB/sec pIII_sse : 2352.000 MB/sec pII_mmx : 4696.000 MB/sec p5_mmx : 6252.000 MB/sec raid5: using function: pIII_sse (2352.000 MB/sec) since it seemed to be choosing a sub-optimal function. I read the code and figured out why this is the way it is. However, to save others the same confusion I had, and maybe even save a few picoseconds on startup, I humbly submit the attached patch. -- Josh Litherland <josh@xxxxxxxxxxxxxxxx> Emperor Linux
diff -drauN linux-2.6.6/drivers/md/xor.c linux-2.6.6.new/drivers/md/xor.c --- linux-2.6.6/drivers/md/xor.c 2004-05-09 22:32:37.000000000 -0400 +++ linux-2.6.6.new/drivers/md/xor.c 2004-06-20 14:26:21.000000000 -0400 @@ -108,29 +108,40 @@ } b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; - printk(KERN_INFO "raid5: measuring checksumming speed\n"); + /* + * If this arch/cpu has a short-circuited selection, don't loop through all + * the possible functions, just test the best one + */ + fastest = NULL; + +#ifdef XOR_SELECT_TEMPLATE + fastest = XOR_SELECT_TEMPLATE(fastest); +#endif + #define xor_speed(templ) do_xor_speed((templ), b1, b2) - XOR_TRY_TEMPLATES; + if (fastest) { + xor_speed(fastest); + printk("KERN_INFO raid5: automatically using best checksumming function: %s (%d.%03d MB/sec)\n", + fastest->name, fastest->speed / 1000, fastest->speed % 1000); + } else { + printk(KERN_INFO "raid5: measuring checksumming speed\n"); + XOR_TRY_TEMPLATES; + fastest = template_list; + for (f = fastest; f; f = f->next) + if (f->speed > fastest->speed) + fastest = f; + printk("raid5: using function: %s (%d.%03d MB/sec)\n", + fastest->name, fastest->speed / 1000, fastest->speed % 1000); + + } #undef xor_speed free_pages((unsigned long)b1, 2); - fastest = template_list; - for (f = fastest; f; f = f->next) - if (f->speed > fastest->speed) - fastest = f; - -#ifdef XOR_SELECT_TEMPLATE - fastest = XOR_SELECT_TEMPLATE(fastest); -#endif - active_template = fastest; - printk("raid5: using function: %s (%d.%03d MB/sec)\n", - fastest->name, fastest->speed / 1000, fastest->speed % 1000); - return 0; }