On Wed, 4 Feb 2009 21:27:46 +0000, Ralf Baechle <ralf@xxxxxxxxxxxxxx> wrote: > > If this makes sense, we might be able to sign up to do the work. Anyone > > have a good, caching-aware memcpy test? > > Testing memcpy is an interesting little project. Correctness is one > thing but a good implementation needs to do a few performance tradeoffs > which are best meassure with real world, not synthetic workloads. For correctness test, drivers/dma/dmatest.c might be a good template. For speed test, test_cipher_speed in crypt/tcrypt.c can be used as a template. Attached is a test module I wrote based on it, when I implemented an asm version of csum_partial_copy_nocheck, etc. It will show something like this: # insmod /tmp/testspeed.ko mode=1 testing speed of csum_partial_copy_nocheck test 0 (32 byte): 2051560 operations in 1 seconds (65649920 bytes) test 1 (96 byte): 823512 operations in 1 seconds (79057152 bytes) test 2 (256 byte): 329124 operations in 1 seconds (84255744 bytes) test 3 (512 byte): 167739 operations in 1 seconds (85882368 bytes) ... testing speed of gen_csum_partial_copy_nocheck test 0 (32 byte): 1555953 operations in 1 seconds (49790496 bytes) test 1 (96 byte): 700025 operations in 1 seconds (67202400 bytes) test 2 (256 byte): 293716 operations in 1 seconds (75191296 bytes) test 3 (512 byte): 151770 operations in 1 seconds (77706240 bytes) ... insmod: error inserting '/tmp/testspeed.ko': -1 Resource temporarily unavailable Feel free to hack it ;)
/* * Quick & dirty speed testing module. (Based on tcrypt). * * This file is subject to the terms and conditions of the GNU General Public * License. See the file "COPYING" in the main directory of this archive * for more details. */ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> #include <linux/slab.h> #include <linux/moduleparam.h> #include <linux/jiffies.h> #include <net/checksum.h> static unsigned int sec = 1; static int mode; /* non-optimized version of csum_partial_copy_nocheck */ static unsigned int gen_csum_partial_copy_nocheck(const void *src, void *dst, int len, unsigned int sum) { sum = csum_partial(src, len, sum); memcpy(dst, src, len); return sum; } /* non-optimized version of csum_partial_copy_from_user */ static unsigned int gen_csum_partial_copy_from_user(const void __user *src, void *dst, int len, unsigned int sum, int *err_ptr) { might_sleep(); if (__copy_from_user(dst, src, len)) *err_ptr = -EFAULT; return csum_partial(dst, len, sum); } #define loop_while_sec(start, end, sec, count) \ for (start = jiffies, end = start + sec * HZ, count = 0; \ time_before(jiffies, end); count++) static int test_csum_partial_copy_speed(int cachemiss) { unsigned long start, end; unsigned int i; void *src, *dst; size_t sizes[] = { 0x20, 0x60, 0x100, 0x200, 0x400, 1460, /* ETH_DATA_LEN - 20(ip header) - 20(tcp header) */ 0x800, 0x1000, }; size_t maxsize = sizes[ARRAY_SIZE(sizes) - 1]; int ofs; int count; int err; int bufsize = 0x10000; src = kmalloc(bufsize, GFP_KERNEL); if (!src) return -ENOMEM; dst = kmalloc(bufsize, GFP_KERNEL); if (!dst) { kfree(src); return -ENOMEM; } memset(src, 0xff, maxsize); printk("\ntesting speed of csum_partial_copy_nocheck\n"); for (i = 0; i < ARRAY_SIZE(sizes); i++) { printk("test %u (%d byte): ", i, sizes[i]); ofs = 0; loop_while_sec(start, end, sec, count) { csum_partial_copy_nocheck(src + ofs, dst + ofs, sizes[i], 0); if (cachemiss) { ofs += sizes[i]; if (ofs + sizes[i] > bufsize) ofs = 0; } } printk("%d operations in %d seconds (%d bytes)\n", count, sec, count * sizes[i]); } printk("\ntesting speed of csum_partial_copy_from_user\n"); for (i = 0; i < ARRAY_SIZE(sizes); i++) { printk("test %u (%d byte): ", i, sizes[i]); ofs = 0; loop_while_sec(start, end, sec, count) { csum_partial_copy_from_user((const void __force __user *)src + ofs, dst + ofs, sizes[i], 0, &err); if (cachemiss) { ofs += sizes[i]; if (ofs + sizes[i] > bufsize) ofs = 0; } } printk("%d operations in %d seconds (%d bytes)\n", count, sec, count * sizes[i]); } printk("\ntesting speed of gen_csum_partial_copy_nocheck\n"); for (i = 0; i < ARRAY_SIZE(sizes); i++) { printk("test %u (%d byte): ", i, sizes[i]); ofs = 0; loop_while_sec(start, end, sec, count) { gen_csum_partial_copy_nocheck(src + ofs, dst + ofs, sizes[i], 0); if (cachemiss) { ofs += sizes[i]; if (ofs + sizes[i] > bufsize) ofs = 0; } } printk("%d operations in %d seconds (%d bytes)\n", count, sec, count * sizes[i]); } printk("\ntesting speed of gen_csum_partial_copy_from_user\n"); for (i = 0; i < ARRAY_SIZE(sizes); i++) { printk("test %u (%d byte): ", i, sizes[i]); ofs = 0; loop_while_sec(start, end, sec, count) { gen_csum_partial_copy_from_user((const void __force __user *)src + ofs, dst + ofs, sizes[i], 0, &err); if (cachemiss) { ofs += sizes[i]; if (ofs + sizes[i] > bufsize) ofs = 0; } } printk("%d operations in %d seconds (%d bytes)\n", count, sec, count * sizes[i]); } kfree(src); kfree(dst); return 0; } static int __init init(void) { int ret = 0; switch (mode) { case 0: ret = test_csum_partial_copy_speed(0); break; case 1: ret = test_csum_partial_copy_speed(1); break; } if (ret) return ret; /* We intentionaly return -EAGAIN to prevent keeping the module. */ return -EAGAIN; } static void __exit fini(void) {} module_init(init); module_exit(fini); module_param(mode, int, 0); module_param(sec, uint, 0); MODULE_PARM_DESC(sec, "Length in seconds of speed tests (default 1)"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Quick & dirty speed testing module");