Hi, I am sending you the updated patch (against the latest 2.0.6 version). I call numa_police_memory_int() only for the newly allocated pages, when the area is expanded. I also added a numa_realloc_onnode() function in the same fashion as that of the numa_alloc_onnode(), which sets a specific memory binding. I pass the MPOL_MF_MOVE flag to mbind(), but I am not sure if this is worth it, since the call becomes too slow even in the case of no page migration. Without the MPOL_MF_MOVE flag, of course, if the policy changes between realloc's, previously allocated pages won't be affected. Regards, On 01/03/2011 01:42 AM, Andi Kleen wrote: > >> I am submitting a patch for a realloc function that might be useful. The >> proposed numa_realloc() is merely a wrapper to mremap(), which it calls >> with the flag MREMAP_MAYMOVE. The policy of the vm area is copied by the >> kernel in case of moving. I am also submitting a test program, that >> keeps expanding an initial allocation until a limit is reached and >> checks the mempolicy of the expanded area in every iteration. >> >> My use case is a dynamic array implementation which uses realloc() to >> dynamically expand the array and I want to convert it to a numa-aware >> implementation. > > You need to call numa_police_memory_int() in the function, otherwise > the policy won't be actually preserved. > > -Andi > > -- V.K.
diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c --- numactl-2.0.6-orig/libnuma.c 2011-01-03 15:09:23.000000000 +0200 +++ numactl-2.0.6/libnuma.c 2011-01-03 23:15:14.000000000 +0200 @@ -871,6 +871,17 @@ return mem; } +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size) +{ + char *mem; + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); + if (mem == (char *)-1) + return NULL; + if (new_size > old_size) + numa_police_memory_int(mem + old_size, new_size - old_size); + return mem; +} + void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask) { char *mem; @@ -997,6 +1008,28 @@ return mem; } +void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size, + int node) +{ + char *mem; + struct bitmask *bmp; + + bmp = numa_allocate_nodemask(); + numa_bitmask_setbit(bmp, node); + mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE); + if (mem == (char *)-1) { + mem = NULL; + } else { + unsigned int mbind_flags_save = mbind_flags; + mbind_flags |= MPOL_MF_MOVE; + dombind(mem, new_size, bind_policy, bmp); + mbind_flags = mbind_flags_save; + } + + numa_bitmask_free(bmp); + return mem; +} + void *numa_alloc_local(size_t size) { char *mem; diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile --- numactl-2.0.6-orig/Makefile 2011-01-03 15:09:23.000000000 +0200 +++ numactl-2.0.6/Makefile 2011-01-03 23:22:57.000000000 +0200 @@ -31,7 +31,7 @@ test/after test/before threadtest test_move_pages \ test/mbind_mig_pages test/migrate_pages \ migratepages migspeed migspeed.o libnuma.a \ - test/move_pages + test/move_pages test/realloc_test SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \ numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \ clearcache.c test/*.c @@ -43,7 +43,7 @@ all: numactl migratepages migspeed libnuma.so numademo numamon memhog \ test/tshared stream test/mynode test/pagesize test/ftok test/prefered \ test/randmap test/nodemap test/distance test/tbitmap test/move_pages \ - test/mbind_mig_pages test/migrate_pages libnuma.a + test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a numactl: numactl.o util.o shm.o bitops.o libnuma.so @@ -123,6 +123,8 @@ test/migrate_pages: test/migrate_pages.c libnuma.so +test/realloc_test: test/realloc_test.c libnuma.so + .PHONY: install all clean html depend MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8 diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h --- numactl-2.0.6-orig/numa.h 2011-01-03 15:09:23.000000000 +0200 +++ numactl-2.0.6/numa.h 2011-01-03 21:06:08.000000000 +0200 @@ -208,10 +208,14 @@ void *numa_alloc_interleaved(size_t size); /* Alloc memory located on node */ void *numa_alloc_onnode(size_t size, int node); +/* Realloc memory located on node */ +void *numa_realloc_onnode(void *old_addr, size_t old_size, size_t new_size, int node); /* Alloc memory on local node */ void *numa_alloc_local(size_t size); /* Allocation with current policy */ void *numa_alloc(size_t size); +/* Realloc memory, binding properties are preserved. */ +void *numa_realloc(void *old_addr, size_t old_size, size_t new_size); /* Free memory allocated by the functions above */ void numa_free(void *mem, size_t size); Binary files numactl-2.0.6-orig/test/move_pages and numactl-2.0.6/test/move_pages differ diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c --- numactl-2.0.6-orig/test/realloc_test.c 1970-01-01 02:00:00.000000000 +0200 +++ numactl-2.0.6/test/realloc_test.c 2011-01-03 23:19:31.000000000 +0200 @@ -0,0 +1,119 @@ +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <sys/mman.h> +#include "numa.h" +#include "numaif.h" + +#define DEFAULT_NR_PAGES 1024 + +static int parse_int(const char *str) +{ + char *endptr; + long ret = strtol(str, &endptr, 0); + if (*endptr != '\0') { + fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr); + exit(1); + } + + if (errno == ERANGE) + fprintf(stderr, "[warning] strtol() out of range\n"); + + if (ret > INT_MAX || ret < INT_MIN) { + fprintf(stderr, "[warning] parse_int() out of range\n"); + ret = (ret > 0) ? INT_MAX : INT_MIN; + } + + return (int) ret; +} + +int main(int argc, char **argv) +{ + char *mem; + int page_size = numa_pagesize(); + int node = 0; + int nr_pages = DEFAULT_NR_PAGES; + + if (numa_available() < 0) { + fprintf(stderr, "numa is not available"); + exit(1); + } + + if (argc > 1) + node = parse_int(argv[1]); + if (argc > 2) + nr_pages = parse_int(argv[2]); + + mem = numa_alloc_onnode(page_size, node); + + /* Store the policy of the newly allocated area */ + unsigned long nodemask; + int mode; + int nr_nodes = numa_num_possible_nodes(); + if (get_mempolicy(&mode, &nodemask, nr_nodes, mem, + MPOL_F_NODE | MPOL_F_ADDR) < 0) { + perror("get_mempolicy() failed"); + exit(1); + } + + /* Print some info */ + printf("Page size: %d\n", page_size); + printf("Pages realloc'ed: %d\n", nr_pages); + printf("Allocate data in node: %d\n", node); + + /* Request pages until a realloc moves the data */ + int i; + int nr_inplace = 0; + int nr_moved = 0; + for (i = 0; i < nr_pages; i++) { + /* Enlarge mem with one more page */ +#ifdef TEST_REALLOC_ONNODE + char *new_mem = numa_realloc_onnode(mem, + (i+1)*page_size, + (i+2)*page_size, node); +#else + char *new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size); +#endif + if (!new_mem) { + perror("numa_realloc() failed"); + exit(1); + } + + if (new_mem == mem) + ++nr_inplace; + else + ++nr_moved; + mem = new_mem; + + /* Check the policy of the realloc'ed area */ + unsigned long realloc_nodemask; + int realloc_mode; + if (get_mempolicy(&realloc_mode, &realloc_nodemask, + nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) { + perror("get_mempolicy() failed"); + exit(1); + } + + assert(realloc_nodemask == nodemask && + realloc_mode == mode && "policy changed"); + } + + /* Shrink to the original size */ +#ifdef TEST_REALLOC_ONNODE + mem = numa_realloc_onnode(mem, (nr_pages + 1)*page_size, page_size, node); +#else + mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size); +#endif + if (!mem) { + perror("numa_realloc() failed"); + exit(1); + } + + numa_free(mem, page_size); + printf("In-place reallocs: %d\n", nr_inplace); + printf("Moved reallocs: %d\n", nr_moved); + return 0; +} diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript --- numactl-2.0.6-orig/versions.ldscript 2011-01-03 15:09:23.000000000 +0200 +++ numactl-2.0.6/versions.ldscript 2011-01-03 21:09:13.000000000 +0200 @@ -87,6 +87,8 @@ numa_alloc_interleaved_subset; numa_alloc_local; numa_alloc_onnode; + numa_realloc_onnode; + numa_realloc; numa_allocate_cpumask; numa_allocate_nodemask; numa_available;
Attachment:
signature.asc
Description: OpenPGP digital signature