Hi Vasileios, Thanks for the patch. I'm putting in my to-do list for 2.0.7. I'll let you know when it's in a release candidate tarball. -Cliff On Mon, Jan 03, 2011 at 11:56:42PM +0200, Vasileios Karakasis wrote: > Hi, > > I am sending you the updated patch (against the latest 2.0.6 version). I > call numa_police_memory_int() only for the newly allocated pages, when > the area is expanded. I also added a numa_realloc_onnode() function in > the same fashion as that of the numa_alloc_onnode(), which sets a > specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I > am not sure if this is worth it, since the call becomes too slow even > in the case of no page migration. Without the MPOL_MF_MOVE flag, of > course, if the policy changes between realloc's, previously allocated > pages won't be affected. > > Regards, > > On 01/03/2011 01:42 AM, Andi Kleen wrote: > > > >> I am submitting a patch for a realloc function that might be useful. The > >> proposed numa_realloc() is merely a wrapper to mremap(), which it calls > >> with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the > >> kernel in case of moving. I am also submitting a test program, that > >> keeps expanding an initial allocation until a limit is reached and > >> checks the mempolicy of the expanded area in every iteration. > >> > >> My use case is a dynamic array implementation which uses realloc() to > >> dynamically expand the array and I want to convert it to a numa-aware > >> implementation. > > > > You need to call numa_police_memory_int() in the function, otherwise > > the policy won't be actually preserved. > > > > -Andi > > > > > > -- > V.K. > diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c > --- numactl-2.0.6-orig/libnuma.c 2011-01-03 15:09:23.000000000 +0200 > +++ numactl-2.0.6/libnuma.c 2011-01-03 23:15:14.000000000 +0200 > @@ -871,6 +871,17 @@ > return mem; > } > > +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) > +{ > + char *mem; > + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); > + if (mem == (char *)-1) > + return NULL; > + if (new_size > old_size) > + numa_police_memory_int(mem + old_size, new_size - old_size); > + return mem; > +} > + > void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask) > { > char *mem; > @@ -997,6 +1008,28 @@ > return mem; > } > > +void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size, > + int node) > +{ > + char *mem; > + struct bitmask *bmp; > + > + bmp = numa_allocate_nodemask(); > + numa_bitmask_setbit(bmp, node); > + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); > + if (mem == (char *)-1) { > + mem = NULL; > + } else { > + unsigned int mbind_flags_save = mbind_flags; > + mbind_flags |= MPOL_MF_MOVE; > + dombind(mem, new_size, bind_policy, bmp); > + mbind_flags = mbind_flags_save; > + } > + > + numa_bitmask_free(bmp); > + return mem; > +} > + > void *numa_alloc_local(size_t size) > { > char *mem; > diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile > --- numactl-2.0.6-orig/Makefile 2011-01-03 15:09:23.000000000 +0200 > +++ numactl-2.0.6/Makefile 2011-01-03 23:22:57.000000000 +0200 > @@ -31,7 +31,7 @@ > test/after test/before threadtest test_move_pages \ > test/mbind_mig_pages test/migrate_pages \ > migratepages migspeed migspeed.o libnuma.a \ > - test/move_pages > + test/move_pages test/realloc_test > SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \ > numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \ > clearcache.c test/*.c > @@ -43,7 +43,7 @@ > all: numactl migratepages migspeed libnuma.so numademo numamon memhog \ > test/tshared stream test/mynode test/pagesize test/ftok test/prefered \ > test/randmap test/nodemap test/distance test/tbitmap test/move_pages \ > - test/mbind_mig_pages test/migrate_pages libnuma.a > + test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a > > numactl: numactl.o util.o shm.o bitops.o libnuma.so > > @@ -123,6 +123,8 @@ > > test/migrate_pages: test/migrate_pages.c libnuma.so > > +test/realloc_test: test/realloc_test.c libnuma.so > + > .PHONY: install all clean html depend > > MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8 > diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h > --- numactl-2.0.6-orig/numa.h 2011-01-03 15:09:23.000000000 +0200 > +++ numactl-2.0.6/numa.h 2011-01-03 21:06:08.000000000 +0200 > @@ -208,10 +208,14 @@ > void *numa_alloc_interleaved(size_t size); > /* Alloc memory located on node */ > void *numa_alloc_onnode(size_t size, int node); > +/* Realloc memory located on node */ > +void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size, int node); > /* Alloc memory on local node */ > void *numa_alloc_local(size_t size); > /* Allocation with current policy */ > void *numa_alloc(size_t size); > +/* Realloc memory, binding properties are preserved. */ > +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size); > /* Free memory allocated by the functions above */ > void numa_free(void *mem, size_t size); > > Binary files numactl-2.0.6-orig/test/move_pages and numactl-2.0.6/test/move_pages differ > diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c > --- numactl-2.0.6-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200 > +++ numactl-2.0.6/test/realloc_test.c 2011-01-03 23:19:31.000000000 +0200 > @@ -0,0 +1,119 @@ > +#include <assert.h> > +#include <errno.h> > +#include <limits.h> > +#include <unistd.h> > +#include <stdlib.h> > +#include <stdio.h> > +#include <sys/mman.h> > +#include "numa.h" > +#include "numaif.h" > + > +#define DEFAULT_NR_PAGES 1024 > + > +static int parse_int(const char *str) > +{ > + char *endptr; > + long ret = strtol(str, &endptr, 0); > + if (*endptr != '\0') { > + fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr); > + exit(1); > + } > + > + if (errno == ERANGE) > + fprintf(stderr, "[warning] strtol() out of range\n"); > + > + if (ret > INT_MAX || ret < INT_MIN) { > + fprintf(stderr, "[warning] parse_int() out of range\n"); > + ret = (ret > 0) ? INT_MAX : INT_MIN; > + } > + > + return (int) ret; > +} > + > +int main(int argc, char **argv) > +{ > + char *mem; > + int page_size = numa_pagesize(); > + int node = 0; > + int nr_pages = DEFAULT_NR_PAGES; > + > + if (numa_available() < 0) { > + fprintf(stderr, "numa is not available"); > + exit(1); > + } > + > + if (argc > 1) > + node = parse_int(argv[1]); > + if (argc > 2) > + nr_pages = parse_int(argv[2]); > + > + mem = numa_alloc_onnode(page_size, node); > + > + /* Store the policy of the newly allocated area */ > + unsigned long nodemask; > + int mode; > + int nr_nodes = numa_num_possible_nodes(); > + if (get_mempolicy(&mode, &nodemask, nr_nodes, mem, > + MPOL_F_NODE | MPOL_F_ADDR) < 0) { > + perror("get_mempolicy() failed"); > + exit(1); > + } > + > + /* Print some info */ > + printf("Page size: %d\n", page_size); > + printf("Pages realloc'ed: %d\n", nr_pages); > + printf("Allocate data in node: %d\n", node); > + > + /* Request pages until a realloc moves the data */ > + int i; > + int nr_inplace = 0; > + int nr_moved = 0; > + for (i = 0; i < nr_pages; i++) { > + /* Enlarge mem with one more page */ > +#ifdef TEST_REALLOC_ONNODE > + char *new_mem = numa_realloc_onnode(mem, > + (i+1)*page_size, > + (i+2)*page_size, node); > +#else > + char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size); > +#endif > + if (!new_mem) { > + perror("numa_realloc() failed"); > + exit(1); > + } > + > + if (new_mem == mem) > + ++nr_inplace; > + else > + ++nr_moved; > + mem = new_mem; > + > + /* Check the policy of the realloc'ed area */ > + unsigned long realloc_nodemask; > + int realloc_mode; > + if (get_mempolicy(&realloc_mode, &realloc_nodemask, > + nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) { > + perror("get_mempolicy() failed"); > + exit(1); > + } > + > + assert(realloc_nodemask == nodemask && > + realloc_mode == mode && "policy changed"); > + } > + > + /* Shrink to the original size */ > +#ifdef TEST_REALLOC_ONNODE > + mem = numa_realloc_onnode(mem, (nr_pages + 1)*page_size, page_size, node); > +#else > + mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size); > +#endif > + if (!mem) { > + perror("numa_realloc() failed"); > + exit(1); > + } > + > + numa_free(mem, page_size); > + printf("In-place reallocs: %d\n", nr_inplace); > + printf("Moved reallocs: %d\n", nr_moved); > + return 0; > +} > diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript > --- numactl-2.0.6-orig/versions.ldscript 2011-01-03 15:09:23.000000000 +0200 > +++ numactl-2.0.6/versions.ldscript 2011-01-03 21:09:13.000000000 +0200 > @@ -87,6 +87,8 @@ > numa_alloc_interleaved_subset; > numa_alloc_local; > numa_alloc_onnode; > + numa_realloc_onnode; > + numa_realloc; > numa_allocate_cpumask; > numa_allocate_nodemask; > numa_available; -- Cliff Wickman SGI cpw@xxxxxxx (651) 683-3824 -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html