Re: Transparent Hugepage impact on memcpy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Hitoshi,

On 2013/6/7 21:50, Hitoshi Mitake wrote:

> At Fri, 7 Jun 2013 09:26:58 +0800,
> Jianguo Wu wrote:
>>
>> Hi Hitoshi,
>>
>> Thanks for your reply! please see below.
>>
>> On 2013/6/6 21:54, Hitoshi Mitake wrote:
>>
>>> Hi Jianguo,
>>>
>>> On Wed, Jun 5, 2013 at 12:26 PM, Jianguo Wu <wujianguo@xxxxxxxxxx> wrote:
>>>> Hi,
>>>> One more question, I wrote a memcpy test program, mostly the same as with perf bench memcpy.
>>>> But test result isn't consistent with perf bench when THP is off.
>>>>
>>>>         my program                              perf bench
>>>> THP:    3.628368 GB/Sec (with prefault)         3.672879 GB/Sec (with prefault)
>>>> NO-THP: 3.612743 GB/Sec (with prefault)         6.190187 GB/Sec (with prefault)
>>>>
>>>> Below is my code:
>>>>         src = calloc(1, len);
>>>>         dst = calloc(1, len);
>>>>
>>>>         if (prefault)
>>>>                 memcpy(dst, src, len);
>>>>         gettimeofday(&tv_start, NULL);
>>>>         memcpy(dst, src, len);
>>>>         gettimeofday(&tv_end, NULL);
>>>>
>>>>         timersub(&tv_end, &tv_start, &tv_diff);
>>>>         free(src);
>>>>         free(dst);
>>>>
>>>>         speed = (double)((double)len / timeval2double(&tv_diff));
>>>>         print_bps(speed);
>>>>
>>>> This is weird, is it possible that perf bench do some build optimize?
>>>>
>>>> Thansk,
>>>> Jianguo Wu.
>>>
>>> perf bench mem memcpy is build with -O6. This is the compile command
>>> line (you can get this with make V=1):
>>> gcc -o bench/mem-memcpy-x86-64-asm.o -c -fno-omit-frame-pointer -ggdb3
>>> -funwind-tables -Wall -Wextra -std=gnu99 -Werror -O6 .... # ommited
>>>
>>> Can I see your compile option for your test program and the actual
>>> command line executing perf bench mem memcpy?
>>>
>>
>> I just compiled my test program with gcc -o memcpy-test memcpy-test.c.
>> I tried to use the same compile option with perf bench mem memcpy, and
>> the test result showed no difference.
>>
>> My execute command line for perf bench mem memcpy:
>> #./perf bench mem memcpy -l 1gb -o
> 
> Thanks for your information. I have three more requests for
> reproducing the problem:
> 
> 1. the entire source code of your program

Please see the attachment.

> 2. your gcc version

4.3.4

> 3. your glibc version

glibc-2.11.1-0.17.4

Thanks,
Jianguo Wu

> 
> I should've requested it first, sorry :(
> 
> Thanks,
> Hitoshi
> 
> .
> 


#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <unistd.h>

#define K 1024LL
#define print_bps(x) do {					\
		if (x < K)					\
			printf(" %14lf B/Sec", x);		\
		else if (x < K * K)				\
			printf(" %14lfd KB/Sec", x / K);	\
		else if (x < K * K * K)				\
			printf(" %14lf MB/Sec", x / K / K);	\
		else						\
			printf(" %14lf GB/Sec", x / K / K / K); \
	} while (0)

long long local_atoll(const char *str)
{
	unsigned int i;
	long long length = -1, unit = 1;

	if (!isdigit(str[0]))
		goto out_err;

	for (i = 1; i < strlen(str); i++) {
		switch (str[i]) {
		case 'B':
		case 'b':
			break;
		case 'K':
			if (str[i + 1] != 'B')
				goto out_err;
			else
				goto kilo;
		case 'k':
			if (str[i + 1] != 'b')
				goto out_err;
kilo:
			unit = K;
			break;
		case 'M':
			if (str[i + 1] != 'B')
				goto out_err;
			else
				goto mega;
		case 'm':
			if (str[i + 1] != 'b')
				goto out_err;
mega:
			unit = K * K;
			break;
		case 'G':
			if (str[i + 1] != 'B')
				goto out_err;
			else
				goto giga;
		case 'g':
			if (str[i + 1] != 'b')
				goto out_err;
giga:
			unit = K * K * K;
			break;
		case 'T':
			if (str[i + 1] != 'B')
				goto out_err;
			else
				goto tera;
		case 't':
			if (str[i + 1] != 'b')
				goto out_err;
tera:
			unit = K * K * K * K;
			break;
		case '\0':	/* only specified figures */
			unit = 1;
			break;
		default:
			if (!isdigit(str[i]))
				goto out_err;
			break;
		}
	}

	length = atoll(str) * unit;
	goto out;

out_err:
	length = -1;
out:
	return length;
}

static double timeval2double(struct timeval *ts)
{
	return (double)ts->tv_sec +
			(double)ts->tv_usec / (double)1000000;
}

void do_memcpy(long long len, int prefault)
{
	void *src, *dst;
	struct timeval tv_start, tv_end, tv_diff;
	double res;

	src = calloc(1, len);
	dst = calloc(1, len);

	if (prefault)
		memcpy(dst, src, len);
	gettimeofday(&tv_start, NULL);
	memcpy(dst, src, len);
	gettimeofday(&tv_end, NULL);

	timersub(&tv_end, &tv_start, &tv_diff);
	free(src);
	free(dst);

	res = (double)((double)len / timeval2double(&tv_diff));
	print_bps(res);
	if (prefault)
		printf("\t(with prefault)");
	printf("\n");

}

int main(int argc, char *argv[])
{
	long long len = -1; 
	char ch;
	int prefault = 0;

	while( (ch=getopt(argc, argv, "l:") ) != -1 )  
	{  
		switch(ch)  
		{  
			case 'l':
				len = local_atoll(optarg);
				if (len < 0) {
					printf("Invalid size\n");
					return 0;
				} else				
					printf("# Copying %s Byte ...\n", optarg);
				break;
			default:
				return;
		}
	}

	do_memcpy(len, 1);	
	
	return 0;
}

[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]