[PATCH 3/3] vdso: preallocate new vmas

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



With the exception of um and tile, architectures that use
the install_special_mapping() function, when setting up a
new vma at program startup, do so with the mmap_sem lock
held for writing. Unless there's an error, this process
ends up allocating a new vma through kmem_cache_zalloc,
and inserting it in the task's address space.

This patch moves the vma's space allocation outside of
install_special_mapping(), and leaves the callers to do so 
explicitly, without depending on mmap_sem. The same goes for 
freeing: if the new vma isn't used (and thus the process fails 
at some point), it's caller's responsibility to free it - 
currently this is done inside install_special_mapping.

Furthermore, uprobes behaves exactly the same and thus now the
xol_add_vma() function also preallocates the new vma.

While the changes to x86 vdso handling have been tested on both
large and small 64-bit systems, the rest of the architectures
are totally *untested*. Note that all changes are quite similar
from architecture to architecture.

This patch, when tested on a 64core, 256 Gb NUMA server, benefited
several aim7 workloads: long +27% throughput with over 1500 users;
compute +6.5% with over 1000 users; fserver +83% for small amounts
of users (10-100 range) and +9% for more and new_fserver, showing
a similar behavior, got +67% boost with 100 users and an avg of +8%
when more users were added.

Signed-off-by: Davidlohr Bueso <davidlohr@xxxxxx>
Cc: Russell King <linux@xxxxxxxxxxxxxxxx>
Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will.deacon@xxxxxxx>
Cc: Richard Kuo <rkuo@xxxxxxxxxxxxxx>
Cc: Ralf Baechle <ralf@xxxxxxxxxxxxxx>
Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
Cc: Paul Mundt <lethal@xxxxxxxxxxxx>
Cc: Chris Metcalf <cmetcalf@xxxxxxxxxx>
Cc: Jeff Dike <jdike@xxxxxxxxxxx>
Cc: Richard Weinberger <richard@xxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx>
---
 arch/arm/kernel/process.c          | 22 ++++++++++++++++------
 arch/arm64/kernel/vdso.c           | 21 +++++++++++++++++----
 arch/hexagon/kernel/vdso.c         | 16 ++++++++++++----
 arch/mips/kernel/vdso.c            | 10 +++++++++-
 arch/powerpc/kernel/vdso.c         | 11 ++++++++---
 arch/s390/kernel/vdso.c            | 19 +++++++++++++++----
 arch/sh/kernel/vsyscall/vsyscall.c | 11 ++++++++++-
 arch/tile/kernel/vdso.c            | 13 ++++++++++---
 arch/um/kernel/skas/mmu.c          | 16 +++++++++++-----
 arch/unicore32/kernel/process.c    | 17 ++++++++++++-----
 arch/x86/um/vdso/vma.c             | 18 ++++++++++++++----
 arch/x86/vdso/vdso32-setup.c       | 16 +++++++++++++++-
 arch/x86/vdso/vma.c                | 10 +++++++++-
 include/linux/mm.h                 |  3 ++-
 kernel/events/uprobes.c            | 14 ++++++++++++--
 mm/mmap.c                          | 18 +++++++-----------
 16 files changed, 179 insertions(+), 56 deletions(-)

diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 94f6b05..5637c92 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -13,6 +13,7 @@
 #include <linux/export.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
@@ -480,6 +481,7 @@ extern struct page *get_signal_page(void);
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	unsigned long addr;
 	int ret;
 
@@ -488,6 +490,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (!signal_page)
 		return -ENOMEM;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
@@ -496,14 +502,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	}
 
 	ret = install_special_mapping(mm, addr, PAGE_SIZE,
-		VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-		&signal_page);
-
-	if (ret == 0)
-		mm->context.sigpage = addr;
+				      VM_READ | VM_EXEC | VM_MAYREAD |
+				      VM_MAYWRITE | VM_MAYEXEC,
+				      &signal_page, &vma);
+	if (ret)
+		goto up_fail;
 
- up_fail:
+	mm->context.sigpage = addr;
+	up_write(&mm->mmap_sem);
+	return 0;
+up_fail:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 #endif
diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c
index 6a389dc..519a44c 100644
--- a/arch/arm64/kernel/vdso.c
+++ b/arch/arm64/kernel/vdso.c
@@ -83,20 +83,26 @@ arch_initcall(alloc_vectors_page);
 
 int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp)
 {
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 	unsigned long addr = AARCH32_VECTORS_BASE;
 	int ret;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 	current->mm->context.vdso = (void *)addr;
 
 	/* Map vectors page at the high address. */
 	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 				      VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC,
-				      vectors_page);
+				      vectors_page, &vma);
 
 	up_write(&mm->mmap_sem);
-
+	if (ret)
+		kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 #endif /* CONFIG_COMPAT */
@@ -152,10 +158,15 @@ arch_initcall(vdso_init);
 int arch_setup_additional_pages(struct linux_binprm *bprm,
 				int uses_interp)
 {
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 	unsigned long vdso_base, vdso_mapping_len;
 	int ret;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	/* Be sure to map the data page */
 	vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT;
 
@@ -170,15 +181,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm,
 	ret = install_special_mapping(mm, vdso_base, vdso_mapping_len,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				      vdso_pagelist);
+				      vdso_pagelist, &vma);
 	if (ret) {
 		mm->context.vdso = NULL;
 		goto up_fail;
 	}
 
+	up_write(&mm->mmap_sem);
+	return ret;
 up_fail:
 	up_write(&mm->mmap_sem);
-
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/arch/hexagon/kernel/vdso.c b/arch/hexagon/kernel/vdso.c
index 0bf5a87..188c5bd 100644
--- a/arch/hexagon/kernel/vdso.c
+++ b/arch/hexagon/kernel/vdso.c
@@ -19,6 +19,7 @@
  */
 
 #include <linux/err.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/vmalloc.h>
 #include <linux/binfmts.h>
@@ -63,8 +64,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	int ret;
 	unsigned long vdso_base;
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 
 	/* Try to get it loaded right near ld.so/glibc. */
@@ -78,17 +84,19 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	/* MAYWRITE to allow gdb to COW and set breakpoints. */
 	ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
-				      VM_READ|VM_EXEC|
-				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				      &vdso_page);
-
+					      VM_READ|VM_EXEC|
+					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+					      &vdso_page, &vma);
 	if (ret)
 		goto up_fail;
 
 	mm->context.vdso = (void *)vdso_base;
 
+	up_write(&mm->mmap_sem);
+	return 0;
 up_fail:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c
index 0f1af58..cfc2c7b 100644
--- a/arch/mips/kernel/vdso.c
+++ b/arch/mips/kernel/vdso.c
@@ -74,8 +74,13 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	int ret;
 	unsigned long addr;
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 
 	addr = vdso_addr(mm->start_stack);
@@ -89,15 +94,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				      &vdso_page);
+				      &vdso_page, &vma);
 
 	if (ret)
 		goto up_fail;
 
 	mm->context.vdso = (void *)addr;
 
+	up_write(&mm->mmap_sem);
+	return 0;
 up_fail:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 1d9c926..a23fb5f 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -193,6 +193,7 @@ static void dump_vdso_pages(struct vm_area_struct * vma)
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	struct page **vdso_pagelist;
 	unsigned long vdso_pages;
 	unsigned long vdso_base;
@@ -232,6 +233,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	/* Add a page to the vdso size for the data page */
 	vdso_pages ++;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	/*
 	 * pick a base address for the vDSO in process space. We try to put it
 	 * at vdso_base which is the "natural" base for it, but we might fail
@@ -271,7 +276,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
 				     VM_READ|VM_EXEC|
 				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				     vdso_pagelist);
+				     vdso_pagelist, &vma);
 	if (rc) {
 		current->mm->context.vdso_base = 0;
 		goto fail_mmapsem;
@@ -279,9 +284,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 
 	up_write(&mm->mmap_sem);
 	return 0;
-
- fail_mmapsem:
+fail_mmapsem:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return rc;
 }
 
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 05d75c4..4e00e11 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -180,6 +180,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
 	struct page **vdso_pagelist;
+	struct vm_area_struct *vma;
 	unsigned long vdso_pages;
 	unsigned long vdso_base;
 	int rc;
@@ -213,6 +214,10 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (vdso_pages == 0)
 		return 0;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	current->mm->context.vdso_base = 0;
 
 	/*
@@ -224,7 +229,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
 	if (IS_ERR_VALUE(vdso_base)) {
 		rc = vdso_base;
-		goto out_up;
+		goto out_err;
 	}
 
 	/*
@@ -247,11 +252,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
 				     VM_READ|VM_EXEC|
 				     VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				     vdso_pagelist);
-	if (rc)
+				     vdso_pagelist, &vma);
+	if (rc) {
 		current->mm->context.vdso_base = 0;
-out_up:
+		goto out_err;
+	}
+
+	up_write(&mm->mmap_sem);
+	return 0;
+out_err:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return rc;
 }
 
diff --git a/arch/sh/kernel/vsyscall/vsyscall.c b/arch/sh/kernel/vsyscall/vsyscall.c
index 5ca5797..49d3834 100644
--- a/arch/sh/kernel/vsyscall/vsyscall.c
+++ b/arch/sh/kernel/vsyscall/vsyscall.c
@@ -10,6 +10,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -61,9 +62,14 @@ int __init vsyscall_init(void)
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	unsigned long addr;
 	int ret;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 	addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0);
 	if (IS_ERR_VALUE(addr)) {
@@ -74,14 +80,17 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	ret = install_special_mapping(mm, addr, PAGE_SIZE,
 				      VM_READ | VM_EXEC |
 				      VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-				      syscall_pages);
+				      syscall_pages, &vma);
 	if (unlikely(ret))
 		goto up_fail;
 
 	current->mm->context.vdso = (void *)addr;
 
+	up_write(&mm->mmap_sem);
+	return 0;
 up_fail:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/arch/tile/kernel/vdso.c b/arch/tile/kernel/vdso.c
index 1533af2..cf93e62 100644
--- a/arch/tile/kernel/vdso.c
+++ b/arch/tile/kernel/vdso.c
@@ -15,6 +15,7 @@
 #include <linux/binfmts.h>
 #include <linux/compat.h>
 #include <linux/elf.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 
@@ -140,6 +141,7 @@ int setup_vdso_pages(void)
 {
 	struct page **pagelist;
 	unsigned long pages;
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 	unsigned long vdso_base = 0;
 	int retval = 0;
@@ -147,6 +149,10 @@ int setup_vdso_pages(void)
 	if (!vdso_ready)
 		return 0;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	mm->context.vdso_base = 0;
 
 	pagelist = vdso_pagelist;
@@ -198,10 +204,11 @@ int setup_vdso_pages(void)
 					 pages << PAGE_SHIFT,
 					 VM_READ|VM_EXEC |
 					 VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC,
-					 pagelist);
-	if (retval)
+					 pagelist, &vma);
+	if (retval) {
 		mm->context.vdso_base = 0;
-
+		kmem_cache_free(vm_area_cachep, vma);
+	}
 	return retval;
 }
 
diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
index 007d550..a6c3190 100644
--- a/arch/um/kernel/skas/mmu.c
+++ b/arch/um/kernel/skas/mmu.c
@@ -104,18 +104,23 @@ int init_new_context(struct task_struct *task, struct mm_struct *mm)
 void uml_setup_stubs(struct mm_struct *mm)
 {
 	int err, ret;
+	struct vm_area_struct *vma;
 
 	if (!skas_needs_stub)
 		return;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	ret = init_stub_pte(mm, STUB_CODE,
 			    (unsigned long) &__syscall_stub_start);
 	if (ret)
-		goto out;
+		goto err;
 
 	ret = init_stub_pte(mm, STUB_DATA, mm->context.id.stack);
 	if (ret)
-		goto out;
+		goto err;
 
 	mm->context.stub_pages[0] = virt_to_page(&__syscall_stub_start);
 	mm->context.stub_pages[1] = virt_to_page(mm->context.id.stack);
@@ -124,14 +129,15 @@ void uml_setup_stubs(struct mm_struct *mm)
 	err = install_special_mapping(mm, STUB_START, STUB_END - STUB_START,
 				      VM_READ | VM_MAYREAD | VM_EXEC |
 				      VM_MAYEXEC | VM_DONTCOPY | VM_PFNMAP,
-				      mm->context.stub_pages);
+				      mm->context.stub_pages, &vma);
 	if (err) {
 		printk(KERN_ERR "install_special_mapping returned %d\n", err);
-		goto out;
+		goto err;
 	}
 	return;
 
-out:
+err:
+	kmem_cache_free(vm_area_cachep, vma);
 	force_sigsegv(SIGSEGV, current);
 }
 
diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c
index 778ebba..c18b0e4 100644
--- a/arch/unicore32/kernel/process.c
+++ b/arch/unicore32/kernel/process.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/sched.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/stddef.h>
 #include <linux/unistd.h>
@@ -313,12 +314,18 @@ unsigned long arch_randomize_brk(struct mm_struct *mm)
 
 int vectors_user_mapping(void)
 {
+	int ret = 0;
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
-	return install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
-				       VM_READ | VM_EXEC |
-				       VM_MAYREAD | VM_MAYEXEC |
-				       VM_DONTEXPAND | VM_DONTDUMP,
-				       NULL);
+
+	ret = install_special_mapping(mm, 0xffff0000, PAGE_SIZE,
+				      VM_READ | VM_EXEC |
+				      VM_MAYREAD | VM_MAYEXEC |
+				      VM_DONTEXPAND | VM_DONTDUMP,
+				      NULL, &vma);
+	if (ret)
+		kmem_cache_free(vm_area_cachep, vma);
+	return ret;
 }
 
 const char *arch_vma_name(struct vm_area_struct *vma)
diff --git a/arch/x86/um/vdso/vma.c b/arch/x86/um/vdso/vma.c
index af91901..888d856 100644
--- a/arch/x86/um/vdso/vma.c
+++ b/arch/x86/um/vdso/vma.c
@@ -55,19 +55,29 @@ subsys_initcall(init_vdso);
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	int err;
+	struct vm_area_struct *vma;
 	struct mm_struct *mm = current->mm;
 
 	if (!vdso_enabled)
 		return 0;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 
 	err = install_special_mapping(mm, um_vdso_addr, PAGE_SIZE,
-		VM_READ|VM_EXEC|
-		VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-		vdsop);
+				      VM_READ|VM_EXEC|
+				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
+				      vdsop, &vma);
+	if (err)
+		goto out_err;
 
 	up_write(&mm->mmap_sem);
-
+	return err;
+out_err:
+	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return err;
 }
diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
index d6bfb87..debb339 100644
--- a/arch/x86/vdso/vdso32-setup.c
+++ b/arch/x86/vdso/vdso32-setup.c
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/string.h>
 #include <linux/elf.h>
+#include <linux/slab.h>
 #include <linux/mm.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -307,6 +308,7 @@ int __init sysenter_setup(void)
 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	unsigned long addr;
 	int ret = 0;
 	bool compat;
@@ -319,6 +321,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	if (vdso_enabled == VDSO_DISABLED)
 		return 0;
 
+	if (compat_uses_vma || !compat) {
+		vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+		if (unlikely(!vma))
+			return -ENOMEM;
+	}
+
 	down_write(&mm->mmap_sem);
 
 	/* Test compat mode once here, in case someone
@@ -346,7 +354,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 		ret = install_special_mapping(mm, addr, PAGE_SIZE,
 					      VM_READ|VM_EXEC|
 					      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-					      vdso32_pages);
+					      vdso32_pages, &vma);
 
 		if (ret)
 			goto up_fail;
@@ -355,12 +363,18 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
 	current_thread_info()->sysenter_return =
 		VDSO32_SYMBOL(addr, SYSENTER_RETURN);
 
+	up_write(&mm->mmap_sem);
+
+	return ret;
+
   up_fail:
 	if (ret)
 		current->mm->context.vdso = NULL;
 
 	up_write(&mm->mmap_sem);
 
+	if (ret && (compat_uses_vma || !compat))
+		kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/arch/x86/vdso/vma.c b/arch/x86/vdso/vma.c
index 431e875..82c6b87 100644
--- a/arch/x86/vdso/vma.c
+++ b/arch/x86/vdso/vma.c
@@ -154,12 +154,17 @@ static int setup_additional_pages(struct linux_binprm *bprm,
 				  unsigned size)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
 	unsigned long addr;
 	int ret;
 
 	if (!vdso_enabled)
 		return 0;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 	addr = vdso_addr(mm->start_stack, size);
 	addr = get_unmapped_area(NULL, addr, size, 0, 0);
@@ -173,14 +178,17 @@ static int setup_additional_pages(struct linux_binprm *bprm,
 	ret = install_special_mapping(mm, addr, size,
 				      VM_READ|VM_EXEC|
 				      VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-				      pages);
+				      pages, &vma);
 	if (ret) {
 		current->mm->context.vdso = NULL;
 		goto up_fail;
 	}
 
+	up_write(&mm->mmap_sem);
+	return ret;
 up_fail:
 	up_write(&mm->mmap_sem);
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b6e55e..4984fff 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1515,7 +1515,8 @@ extern struct file *get_mm_exe_file(struct mm_struct *mm);
 extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
 extern int install_special_mapping(struct mm_struct *mm,
 				   unsigned long addr, unsigned long len,
-				   unsigned long flags, struct page **pages);
+				   unsigned long flags, struct page **pages,
+				   struct vm_area_struct **vma_prealloc);
 
 extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
 
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index ad8e1bd..18abeaa 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -1099,8 +1099,14 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon
 static int xol_add_vma(struct xol_area *area)
 {
 	struct mm_struct *mm = current->mm;
+	struct vm_area_struct *vma;
+
 	int ret = -EALREADY;
 
+	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
+	if (unlikely(!vma))
+		return -ENOMEM;
+
 	down_write(&mm->mmap_sem);
 	if (mm->uprobes_state.xol_area)
 		goto fail;
@@ -1114,16 +1120,20 @@ static int xol_add_vma(struct xol_area *area)
 	}
 
 	ret = install_special_mapping(mm, area->vaddr, PAGE_SIZE,
-				VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO, &area->page);
+				      VM_EXEC|VM_MAYEXEC|VM_DONTCOPY|VM_IO,
+				      &area->page, &vma);
 	if (ret)
 		goto fail;
 
 	smp_wmb();	/* pairs with get_xol_area() */
 	mm->uprobes_state.xol_area = area;
 	ret = 0;
+
+	up_write(&mm->mmap_sem);
+	return 0;
  fail:
 	up_write(&mm->mmap_sem);
-
+	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 6a7824d..6e238a3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2909,17 +2909,17 @@ static const struct vm_operations_struct special_mapping_vmops = {
  * The region past the last page supplied will always produce SIGBUS.
  * The array pointer and the pages it points to are assumed to stay alive
  * for as long as this mapping might exist.
+ *
+ * The caller has the responsibility of allocating the new vma, and freeing
+ * it if it was unused (when insert_vm_struct() fails).
  */
 int install_special_mapping(struct mm_struct *mm,
 			    unsigned long addr, unsigned long len,
-			    unsigned long vm_flags, struct page **pages)
+			    unsigned long vm_flags, struct page **pages,
+			    struct vm_area_struct **vma_prealloc)
 {
-	int ret;
-	struct vm_area_struct *vma;
-
-	vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
-	if (unlikely(vma == NULL))
-		return -ENOMEM;
+	int ret = 0;
+	struct vm_area_struct *vma = *vma_prealloc;
 
 	INIT_LIST_HEAD(&vma->anon_vma_chain);
 	vma->vm_mm = mm;
@@ -2939,11 +2939,7 @@ int install_special_mapping(struct mm_struct *mm,
 	mm->total_vm += len >> PAGE_SHIFT;
 
 	perf_event_mmap(vma);
-
-	return 0;
-
 out:
-	kmem_cache_free(vm_area_cachep, vma);
 	return ret;
 }
 
-- 
1.8.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]