Re: memcpy and prefetch

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, 4 Feb 2009 21:27:46 +0000, Ralf Baechle <ralf@xxxxxxxxxxxxxx> wrote:
> > If this makes sense, we might be able to sign up to do the work. Anyone
> > have a good, caching-aware memcpy test?
> 
> Testing memcpy is an interesting little project.  Correctness is one
> thing but a good implementation needs to do a few performance tradeoffs
> which are best meassure with real world, not synthetic workloads.

For correctness test, drivers/dma/dmatest.c might be a good template.

For speed test, test_cipher_speed in crypt/tcrypt.c can be used as a
template.  Attached is a test module I wrote based on it, when I
implemented an asm version of csum_partial_copy_nocheck, etc.  It will
show something like this:

# insmod /tmp/testspeed.ko mode=1

testing speed of csum_partial_copy_nocheck
test 0 (32 byte): 2051560 operations in 1 seconds (65649920 bytes)
test 1 (96 byte): 823512 operations in 1 seconds (79057152 bytes)
test 2 (256 byte): 329124 operations in 1 seconds (84255744 bytes)
test 3 (512 byte): 167739 operations in 1 seconds (85882368 bytes)
...
testing speed of gen_csum_partial_copy_nocheck
test 0 (32 byte): 1555953 operations in 1 seconds (49790496 bytes)
test 1 (96 byte): 700025 operations in 1 seconds (67202400 bytes)
test 2 (256 byte): 293716 operations in 1 seconds (75191296 bytes)
test 3 (512 byte): 151770 operations in 1 seconds (77706240 bytes)
...
insmod: error inserting '/tmp/testspeed.ko': -1 Resource temporarily unavailable

Feel free to hack it ;)

/*
 * Quick & dirty speed testing module.  (Based on tcrypt).
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 */

#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/moduleparam.h>
#include <linux/jiffies.h>
#include <net/checksum.h>

static unsigned int sec = 1;
static int mode;

/* non-optimized version of csum_partial_copy_nocheck */
static unsigned int gen_csum_partial_copy_nocheck(const void *src,
	void *dst, int len, unsigned int sum)
{
	sum = csum_partial(src, len, sum);
	memcpy(dst, src, len);
	return sum;
}

/* non-optimized version of csum_partial_copy_from_user */
static unsigned int gen_csum_partial_copy_from_user(const void __user *src,
	void *dst, int len, unsigned int sum, int *err_ptr)
{
	might_sleep();
	if (__copy_from_user(dst, src, len))
		*err_ptr = -EFAULT;
	return csum_partial(dst, len, sum);
}

#define loop_while_sec(start, end, sec, count) \
	for (start = jiffies, end = start + sec * HZ, count = 0; \
	     time_before(jiffies, end); count++)

static int test_csum_partial_copy_speed(int cachemiss)
{
	unsigned long start, end;
	unsigned int i;
	void *src, *dst;
	size_t sizes[] = {
		0x20, 0x60, 0x100, 0x200, 0x400,
		1460, /* ETH_DATA_LEN - 20(ip header) - 20(tcp header) */
		0x800, 0x1000,
	};
	size_t maxsize = sizes[ARRAY_SIZE(sizes) - 1];
	int ofs;
	int count;
	int err;
	int bufsize = 0x10000;

	src = kmalloc(bufsize, GFP_KERNEL);
	if (!src)
		return -ENOMEM;
	dst = kmalloc(bufsize, GFP_KERNEL);
	if (!dst) {
		kfree(src);
		return -ENOMEM;
	}
	memset(src, 0xff, maxsize);

	printk("\ntesting speed of csum_partial_copy_nocheck\n");

	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
		printk("test %u (%d byte): ", i, sizes[i]);

		ofs = 0;
		loop_while_sec(start, end, sec, count) {
			csum_partial_copy_nocheck(src + ofs, dst + ofs,
						  sizes[i], 0);
			if (cachemiss) {
				ofs += sizes[i];
				if (ofs + sizes[i] > bufsize)
					ofs = 0;
			}
		}

		printk("%d operations in %d seconds (%d bytes)\n",
		       count, sec, count * sizes[i]);
	}

	printk("\ntesting speed of csum_partial_copy_from_user\n");

	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
		printk("test %u (%d byte): ", i, sizes[i]);

		ofs = 0;
		loop_while_sec(start, end, sec, count) {
			csum_partial_copy_from_user((const void __force __user *)src + ofs,
						    dst + ofs,
						    sizes[i], 0, &err);
			if (cachemiss) {
				ofs += sizes[i];
				if (ofs + sizes[i] > bufsize)
					ofs = 0;
			}
		}

		printk("%d operations in %d seconds (%d bytes)\n",
		       count, sec, count * sizes[i]);
	}

	printk("\ntesting speed of gen_csum_partial_copy_nocheck\n");

	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
		printk("test %u (%d byte): ", i, sizes[i]);

		ofs = 0;
		loop_while_sec(start, end, sec, count) {
			gen_csum_partial_copy_nocheck(src + ofs, dst + ofs,
						      sizes[i], 0);
			if (cachemiss) {
				ofs += sizes[i];
				if (ofs + sizes[i] > bufsize)
					ofs = 0;
			}
		}

		printk("%d operations in %d seconds (%d bytes)\n",
		       count, sec, count * sizes[i]);
	}

	printk("\ntesting speed of gen_csum_partial_copy_from_user\n");

	for (i = 0; i < ARRAY_SIZE(sizes); i++) {
		printk("test %u (%d byte): ", i, sizes[i]);

		ofs = 0;
		loop_while_sec(start, end, sec, count) {
			gen_csum_partial_copy_from_user((const void __force __user *)src + ofs,
							dst + ofs,
							sizes[i], 0, &err);
			if (cachemiss) {
				ofs += sizes[i];
				if (ofs + sizes[i] > bufsize)
					ofs = 0;
			}
		}

		printk("%d operations in %d seconds (%d bytes)\n",
		       count, sec, count * sizes[i]);
	}

	kfree(src);
	kfree(dst);
	return 0;
}

static int __init init(void)
{
	int ret = 0;
	switch (mode) {
	case 0:
		ret = test_csum_partial_copy_speed(0);
		break;
	case 1:
		ret = test_csum_partial_copy_speed(1);
		break;
	}
	if (ret)
		return ret;

	/* We intentionaly return -EAGAIN to prevent keeping the module. */
	return -EAGAIN;
}

static void __exit fini(void) {}

module_init(init);
module_exit(fini);

module_param(mode, int, 0);
module_param(sec, uint, 0);
MODULE_PARM_DESC(sec, "Length in seconds of speed tests (default 1)");

MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Quick & dirty speed testing module");

[Index of Archives]     [Linux MIPS Home]     [LKML Archive]     [Linux ARM Kernel]     [Linux ARM]     [Linux]     [Git]     [Yosemite News]     [Linux SCSI]     [Linux Hams]

  Powered by Linux