Re: realloc function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I am submitting the final patch. Essentially, it is my original enhanced
with some comments about the rationale as we discussed it here and an
entry + brief description in the man page.

Regards,

On 01/05/2011 09:25 PM, Andi Kleen wrote:
> On Wed, Jan 05, 2011 at 05:00:43PM +0200, Vasileios Karakasis wrote:
>> Peeking inside the mremap() source, I can see that the kernel already
>> does this, i.e., mremap() preserves the policy of the original vm area.
> 
> That is true.
>>
>> The problem is when the user has not specified a binding for the
>> original mapping (default policy), in which case copying explicitly the
>> policy from the old to the new pages won't work either; the new pages
>> will still have MPOL_DEFAULT. So realloc() cannot guarantee that the new
> 
> 
> It would be possible to do
> 
> get_mempolicy MPOL_F_ADDR
> if policy == MPOL_DEFAULT:
> 	get_mempolicy MPOL_F_NODE|MPOL_F_ADDR, &node
> 	mbind MPOL_PREFERRED, node
> 
> But then you end up with preferred instead of default.  It should
> be usually the same, but may not in some corner cases.
> 
> I guess you're right and that case is too obscure to care about.
> I guess your original patch without anything was good enough.
> It may be worth it to add some comments on this rationale though.
> 
> 
>> pages will be allocated on the same node as the preceding alloc(),
>> unless there is a way to obtain the actual node that the pages of the
>> original allocation were allocated on. In my opinion, this isn't a real
>> problem, because even the simple numa_alloc() using the default policy,
>> cannot guarantee that the pages will be allocated on the node of the
>> calling cpu: what if the task is migrated to a different cpu on a
>> different node, while touching (i.e., allocating) the pages with the
>> police_memory_int()?
> 
> process policy and MPOL_DEFAULT are always just heuristics; such races
> can always occur. They usually should not because the scheduler
> does not migrate too frequently.
> 
> -Andi

-- 
V.K.
diff -urN numactl-2.0.6-orig/libnuma.c numactl-2.0.6/libnuma.c
--- numactl-2.0.6-orig/libnuma.c	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/libnuma.c	2011-01-10 23:49:58.000000000 +0200
@@ -871,6 +871,23 @@
 	return mem;
 } 
 
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size)
+{
+	char *mem;
+	mem = mremap(old_addr, old_size, new_size, MREMAP_MAYMOVE);
+	if (mem == (char *)-1)
+		return NULL;
+	/*
+	 *	The memory policy of the allocated pages is preserved by mremap(), so
+	 *	there is no need to (re)set it here. If the policy of the original
+	 *	allocation is not set, the new pages will be allocated according to the
+	 *	process' mempolicy. Trying to allocate explicitly the new pages on the
+	 *	same node as the original ones would require changing the policy of the
+	 *	newly allocated pages, which violates the numa_realloc() semantics.
+	 */ 
+	return mem;
+}
+
 void *numa_alloc_interleaved_subset_v1(size_t size, const nodemask_t *mask)
 {
 	char *mem;
diff -urN numactl-2.0.6-orig/Makefile numactl-2.0.6/Makefile
--- numactl-2.0.6-orig/Makefile	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/Makefile	2011-01-03 23:22:57.000000000 +0200
@@ -31,7 +31,7 @@
 	      test/after test/before threadtest test_move_pages \
 	      test/mbind_mig_pages test/migrate_pages \
 	      migratepages migspeed migspeed.o libnuma.a \
-	      test/move_pages
+	      test/move_pages test/realloc_test
 SOURCES := bitops.c libnuma.c distance.c memhog.c numactl.c numademo.c \
 	numamon.c shm.c stream_lib.c stream_main.c syscall.c util.c mt.c \
 	clearcache.c test/*.c
@@ -43,7 +43,7 @@
 all: numactl migratepages migspeed libnuma.so numademo numamon memhog \
      test/tshared stream test/mynode test/pagesize test/ftok test/prefered \
      test/randmap test/nodemap test/distance test/tbitmap test/move_pages \
-     test/mbind_mig_pages test/migrate_pages libnuma.a
+     test/mbind_mig_pages test/migrate_pages test/realloc_test libnuma.a
 
 numactl: numactl.o util.o shm.o bitops.o libnuma.so
 
@@ -123,6 +123,8 @@
 
 test/migrate_pages: test/migrate_pages.c libnuma.so
 
+test/realloc_test: test/realloc_test.c libnuma.so
+
 .PHONY: install all clean html depend
 
 MANPAGES := numa.3 numactl.8 numastat.8 migratepages.8 migspeed.8
diff -urN numactl-2.0.6-orig/numa.3 numactl-2.0.6/numa.3
--- numactl-2.0.6-orig/numa.3	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.3	2011-01-10 23:39:02.000000000 +0200
@@ -87,6 +87,8 @@
 .BI "void *numa_alloc_interleaved_subset(size_t " size ",  struct bitmask *" nodemask );
 .BI "void *numa_alloc(size_t " size );
 .br
+.BI "void *numa_realloc(void *"old_addr ", size_t " old_size ", size_t " new_size );
+.br
 .BI "void numa_free(void *" start ", size_t " size );
 .sp
 .BI "int numa_run_on_node(int " node );
@@ -599,6 +601,39 @@
 .BR numa_free ().
 On errors NULL is returned.
 
+.BR numa_realloc ()
+changes the size of the memory area pointed to by
+.I old_addr
+from
+.I old_size
+to
+.I new_size.
+The memory area pointed to by
+.I old_addr
+must have been allocated with one of the
+.BR numa_alloc*
+functions.
+The
+.I new_size
+will be rounded up to a multiple of the system page size. The contents of the
+memory area will be unchanged to the minimum of the old and new sizes; newly
+allocated memory will be uninitialized. The memory policy (and node bindings)
+associated with the original memory area will be preserved in the resized
+area. For example, if the initial area was allocated with a call to
+.BR numa_alloc_onnode(),
+then the new pages (if the area is enlarged) will be allocated on the same node.
+However, if no memory policy was set for the original area, then
+.BR numa_realloc ()
+cannot guarantee that the new pages will be allocated on the same node. On
+success, the address of the resized area is returned (which might be different
+from that of the initial area), otherwise NULL is returned and
+.I errno
+is set to indicate the error. The pointer returned by
+.BR numa_realloc ()
+is suitable for passing to
+.BR numa_free ().
+
+
 .BR numa_free ()
 frees
 .I size
diff -urN numactl-2.0.6-orig/numa.h numactl-2.0.6/numa.h
--- numactl-2.0.6-orig/numa.h	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/numa.h	2011-01-11 00:06:12.000000000 +0200
@@ -212,6 +212,8 @@
 void *numa_alloc_local(size_t size);
 /* Allocation with current policy */
 void *numa_alloc(size_t size);
+/* Change the size of a memory area preserving the memory policy */
+void *numa_realloc(void *old_addr, size_t old_size, size_t new_size);
 /* Free memory allocated by the functions above */
 void numa_free(void *mem, size_t size);
 
diff -urN numactl-2.0.6-orig/test/realloc_test.c numactl-2.0.6/test/realloc_test.c
--- numactl-2.0.6-orig/test/realloc_test.c	1970-01-01 02:00:00.000000000 +0200
+++ numactl-2.0.6/test/realloc_test.c	2011-01-10 23:55:37.000000000 +0200
@@ -0,0 +1,108 @@
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <sys/mman.h>
+#include "numa.h"
+#include "numaif.h"
+
+#define DEFAULT_NR_PAGES	1024
+
+static int parse_int(const char *str)
+{
+	char	*endptr;
+	long	ret = strtol(str, &endptr, 0);
+	if (*endptr != '\0') {
+		fprintf(stderr, "[error] strtol() failed: parse error: %s\n", endptr);
+		exit(1);
+	}
+
+	if (errno == ERANGE)
+		fprintf(stderr, "[warning] strtol() out of range\n");
+
+	if (ret > INT_MAX || ret < INT_MIN) {
+		fprintf(stderr, "[warning] parse_int() out of range\n");
+		ret = (ret > 0) ? INT_MAX : INT_MIN;
+	}
+
+	return (int) ret;
+}
+
+int main(int argc, char **argv)
+{
+	char	*mem;
+	int		page_size = numa_pagesize();
+	int		node = 0;
+	int		nr_pages = DEFAULT_NR_PAGES;
+
+	if (numa_available() < 0) {
+		fprintf(stderr, "numa is not available");
+		exit(1);
+	}
+
+	if (argc > 1)
+		node = parse_int(argv[1]);
+	if (argc > 2)
+		nr_pages = parse_int(argv[2]);
+	
+	mem = numa_alloc_onnode(page_size, node);
+
+	/* Store the policy of the newly allocated area */
+	unsigned long	nodemask;
+	int				mode;
+	int				nr_nodes = numa_num_possible_nodes();
+	if (get_mempolicy(&mode, &nodemask, nr_nodes, mem,
+					  MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+		perror("get_mempolicy() failed");
+		exit(1);
+	}
+
+	/* Print some info */
+	printf("Page size: %d\n", page_size);
+	printf("Pages realloc'ed: %d\n", nr_pages);
+	printf("Allocate data in node: %d\n", node);
+
+	int i;
+	int nr_inplace = 0;
+	int nr_moved   = 0;
+	for (i = 0; i < nr_pages; i++) {
+		/* Enlarge mem with one more page */
+		char	*new_mem = numa_realloc(mem, (i+1)*page_size, (i+2)*page_size);
+		if (!new_mem) {
+			perror("numa_realloc() failed");
+			exit(1);
+		}
+
+		if (new_mem == mem)
+			++nr_inplace;
+		else
+			++nr_moved;
+		mem = new_mem;
+
+		/* Check the policy of the realloc'ed area */
+		unsigned long	realloc_nodemask;
+		int				realloc_mode;
+		if (get_mempolicy(&realloc_mode, &realloc_nodemask,
+						  nr_nodes, mem, MPOL_F_NODE | MPOL_F_ADDR) < 0) {
+			perror("get_mempolicy() failed");
+			exit(1);
+		}
+
+		assert(realloc_nodemask == nodemask &&
+			   realloc_mode == mode && "policy changed");
+	}
+
+	/* Shrink to the original size */
+	mem = numa_realloc(mem, (nr_pages + 1)*page_size, page_size);
+	if (!mem) {
+		perror("numa_realloc() failed");
+		exit(1);
+	}
+
+	numa_free(mem, page_size);
+	printf("In-place reallocs: %d\n", nr_inplace);
+	printf("Moved reallocs: %d\n", nr_moved);
+	return 0;
+}
diff -urN numactl-2.0.6-orig/versions.ldscript numactl-2.0.6/versions.ldscript
--- numactl-2.0.6-orig/versions.ldscript	2011-01-03 15:09:23.000000000 +0200
+++ numactl-2.0.6/versions.ldscript	2011-01-10 18:36:37.000000000 +0200
@@ -87,6 +87,7 @@
     numa_alloc_interleaved_subset;
     numa_alloc_local;
     numa_alloc_onnode;
+    numa_realloc;
     numa_allocate_cpumask;
     numa_allocate_nodemask;
     numa_available;

Attachment: signature.asc
Description: OpenPGP digital signature


[Index of Archives]     [Linux Kernel]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]     [Devices]

  Powered by Linux