Stefan Lankes wrote: > I am not able to reconstruct any performance drawbacks on my system. > Could you send me your low-level benchmark? It's attached. As you may see, it's fairly trivial. It just does several iterations of mbind+touch_all_pages for different power-of-two buffer sizes. Just replace mbind with madvise in the inner loop if you want to try with your affinit-on-next-touch. Which kernels are you using when comparing your next-touch implementation with Lee's patchset? Brice
#define _GNU_SOURCE 1 #include <unistd.h> #include <sys/mman.h> #include <sys/time.h> #include <stdio.h> #include <stdlib.h> #include <numa.h> #include <numaif.h> #include <errno.h> #include <sched.h> #ifndef MPOL_MF_LAZY #define MPOL_MF_LAZY (1<<3) #endif #define TOTALPAGES 262144 int nbpages, loop; int pagesize; int main(int argc, char **argv) { void *buffer; int i, err; unsigned long nodemask; int maxnode; struct timeval tv1, tv2; unsigned long us; cpu_set_t cset; /* put the thread on node 0 */ CPU_ZERO(&cset); CPU_SET(0, &cset); err = sched_setaffinity(0, sizeof(cset), &cset); if (err < 0) { perror("sched_setaffinity"); exit(-1); } pagesize = getpagesize(); maxnode = numa_max_node(); fprintf(stdout, "# Nb_pages\tCost(ns)\n"); for(nbpages=2 ; nbpages<=TOTALPAGES ; nbpages*=2) { int loops = TOTALPAGES/nbpages; if (loops > 128) loops = 128; buffer = mmap(NULL, TOTALPAGES*pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); if (buffer == MAP_FAILED) { perror("mmap"); exit(-1); } /* bind to node 1 and prefault */ nodemask = 1<<1; err = mbind(buffer, TOTALPAGES*pagesize, MPOL_BIND, &nodemask, maxnode+2, MPOL_MF_MOVE); if (err < 0) { perror("mbind"); exit(-1); } for(i=0 ; i<TOTALPAGES ; i++) *(int*)(buffer+i*pagesize) = 0; gettimeofday(&tv1, NULL); for(loop=0 ; loop<loops ; loop++) { /* mark subbuf as next-touch and touch it */ void *subbuf = buffer + loop*nbpages*pagesize; err = mbind(subbuf, nbpages*pagesize, MPOL_PREFERRED, NULL, 0, MPOL_MF_MOVE|MPOL_MF_LAZY); if (err < 0) { perror("mbind"); exit(-1); } for(i=0;i<nbpages;i++) *(int*)(subbuf + i*pagesize) = 42; } gettimeofday(&tv2, NULL); us = (tv2.tv_sec - tv1.tv_sec) * 1000000 + (tv2.tv_usec - tv1.tv_usec); fprintf(stdout, "%d\t%ld\n", nbpages, us * 1000/loops); fflush(stdout); munmap(buffer, TOTALPAGES*pagesize); } return 0; }