Hi Bing, On Sun, Jul 08, 2018 at 10:03:48PM +0800, bing zhu wrote: > void *p = malloc(4096 * max); > start = usec(); > for (i = 0; i < max; i++) { > memcpy(p + i * 4096, page, 4096); > } > end = usec(); > printf("%s : %d time use %lu us \n", __func__, max,end - start); > > static unsigned long usec(void) > { > struct timeval tv; > gettimeofday(&tv, 0); > return (unsigned long)tv.tv_sec * 1000000 + tv.tv_usec; > } I think for these benchmarking stuff, to evaluate the cycles and time correctly you should use the __rdtscp(more info at "AMD64 Architecture Programmer’s Manual Volume 3: General-Purpose and System Instructions" Pg 401) Userspace: ---------------------------------------------------------------------- #include <stdio.h> #include <time.h> #include <stdint.h> #include <x86intrin.h> volatile unsigned sink; unsigned int junk; int main (void) { clock_t start = clock(); register uint64_t t=__rdtscp(&junk); for(size_t i=0; i<10000000; ++i) sink++; t=__rdtscp(&junk)-t; clock_t end = clock(); double cpu_time_used = ((double) (end - start)) / CLOCKS_PER_SEC; printf("for loop took %f seconds to execute %zu cylces\n", cpu_time_used, t); } --------------------------------------------------------------------- Kernelspace: If you want to dig more: https://www.intel.com/content/dam/www/public/us/en/documents/white-papers/ia-32-ia-64-benchmark-code-execution-paper.pdf Thanks -- Himanshu Jha Undergraduate Student Department of Electronics & Communication Guru Tegh Bahadur Institute of Technology _______________________________________________ Kernelnewbies mailing list Kernelnewbies@xxxxxxxxxxxxxxxxx https://lists.kernelnewbies.org/mailman/listinfo/kernelnewbies