+ mm-add-mremap_dontunmap-to-mremap.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm/mremap: add MREMAP_DONTUNMAP to mremap()
has been added to the -mm tree.  Its filename is
     mm-add-mremap_dontunmap-to-mremap.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-add-mremap_dontunmap-to-mremap.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-add-mremap_dontunmap-to-mremap.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Brian Geffon <bgeffon@xxxxxxxxxx>
Subject: mm/mremap: add MREMAP_DONTUNMAP to mremap()

When remapping an anonymous, private mapping, if MREMAP_DONTUNMAP is set,
the source mapping will not be removed.  Instead it will be cleared as if
a brand new anonymous, private mapping had been created atomically as part
of the mremap() call.   If a userfaultfd was watching the source, it will
continue to watch the new mapping.   For a mapping that is shared or not
anonymous, MREMAP_DONTUNMAP will cause the mremap() call to fail.  Because
MREMAP_DONTUNMAP always results in moving a VMA you MUST use the
MREMAP_MAYMOVE flag.  The final result is two equally sized VMAs where the
destination contains the PTEs of the source.

We hope to use this in Chrome OS where with userfaultfd we could write an
anonymous mapping to disk without having to STOP the process or worry
about VMA permission changes.

This feature also has a use case in Android, Lokesh Gidra has said that
"As part of using userfaultfd for GC, We'll have to move the physical
pages of the java heap to a separate location.  For this purpose mremap
will be used.  Without the MREMAP_DONTUNMAP flag, when I mremap the java
heap, its virtual mapping will be removed as well.  Therefore, we'll
require performing mmap immediately after.  This is not only time
consuming but also opens a time window where a native thread may call mmap
and reserve the java heap's address range for its own usage.  This flag
solves the problem."

Link: http://lkml.kernel.org/r/20200207201856.46070-1-bgeffon@xxxxxxxxxx
Signed-off-by: Brian Geffon <bgeffon@xxxxxxxxxx>
Cc: "Michael S . Tsirkin" <mst@xxxxxxxxxx>
Cc: Brian Geffon <bgeffon@xxxxxxxxxx>
Cc: Arnd Bergmann <arnd@xxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx>
Cc: Will Deacon <will@xxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Sonny Rao <sonnyrao@xxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Joel Fernandes <joel@xxxxxxxxxxxxxxxxx>
Cc: Yu Zhao <yuzhao@xxxxxxxxxx>
Cc: Jesse Barnes <jsbarnes@xxxxxxxxxx>
Cc: Nathan Chancellor <natechancellor@xxxxxxxxx>
Cc: Florian Weimer <fweimer@xxxxxxxxxx>
Cc: "Kirill A . Shutemov" <kirill@xxxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/uapi/linux/mman.h |    5 +
 mm/mremap.c               |   98 ++++++++++++++++++++++++++++--------
 2 files changed, 80 insertions(+), 23 deletions(-)

--- a/include/uapi/linux/mman.h~mm-add-mremap_dontunmap-to-mremap
+++ a/include/uapi/linux/mman.h
@@ -5,8 +5,9 @@
 #include <asm/mman.h>
 #include <asm-generic/hugetlb_encode.h>
 
-#define MREMAP_MAYMOVE	1
-#define MREMAP_FIXED	2
+#define MREMAP_MAYMOVE		1
+#define MREMAP_FIXED		2
+#define MREMAP_DONTUNMAP	4
 
 #define OVERCOMMIT_GUESS		0
 #define OVERCOMMIT_ALWAYS		1
--- a/mm/mremap.c~mm-add-mremap_dontunmap-to-mremap
+++ a/mm/mremap.c
@@ -318,8 +318,8 @@ unsigned long move_page_tables(struct vm
 static unsigned long move_vma(struct vm_area_struct *vma,
 		unsigned long old_addr, unsigned long old_len,
 		unsigned long new_len, unsigned long new_addr,
-		bool *locked, struct vm_userfaultfd_ctx *uf,
-		struct list_head *uf_unmap)
+		bool *locked, unsigned long flags,
+		struct vm_userfaultfd_ctx *uf, struct list_head *uf_unmap)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct vm_area_struct *new_vma;
@@ -408,11 +408,41 @@ static unsigned long move_vma(struct vm_
 	if (unlikely(vma->vm_flags & VM_PFNMAP))
 		untrack_pfn_moved(vma);
 
+	if (unlikely(!err && (flags & MREMAP_DONTUNMAP))) {
+		if (vm_flags & VM_ACCOUNT) {
+			/* Always put back VM_ACCOUNT since we won't unmap */
+			vma->vm_flags |= VM_ACCOUNT;
+
+			vm_acct_memory(vma_pages(new_vma));
+		}
+
+		/*
+		 * locked_vm accounting: if the mapping remained the same size
+		 * it will have just moved and we don't need to touch locked_vm
+		 * because we skip the do_unmap. If the mapping shrunk before
+		 * being moved then the do_unmap on that portion will have
+		 * adjusted vm_locked. Only if the mapping grows do we need to
+		 * do something special; the reason is locked_vm only accounts
+		 * for old_len, but we're now adding new_len - old_len locked
+		 * bytes to the new mapping.
+		 */
+		if (new_len > old_len)
+			mm->locked_vm += (new_len - old_len) >> PAGE_SHIFT;
+
+		goto out;
+	}
+
 	if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
 		/* OOM: unable to split vma, just get accounts right */
 		vm_unacct_memory(excess >> PAGE_SHIFT);
 		excess = 0;
 	}
+
+	if (vm_flags & VM_LOCKED) {
+		mm->locked_vm += new_len >> PAGE_SHIFT;
+		*locked = true;
+	}
+out:
 	mm->hiwater_vm = hiwater_vm;
 
 	/* Restore VM_ACCOUNT if one or two pieces of vma left */
@@ -422,16 +452,12 @@ static unsigned long move_vma(struct vm_
 			vma->vm_next->vm_flags |= VM_ACCOUNT;
 	}
 
-	if (vm_flags & VM_LOCKED) {
-		mm->locked_vm += new_len >> PAGE_SHIFT;
-		*locked = true;
-	}
-
 	return new_addr;
 }
 
 static struct vm_area_struct *vma_to_resize(unsigned long addr,
-	unsigned long old_len, unsigned long new_len, unsigned long *p)
+	unsigned long old_len, unsigned long new_len, unsigned long flags,
+	unsigned long *p)
 {
 	struct mm_struct *mm = current->mm;
 	struct vm_area_struct *vma = find_vma(mm, addr);
@@ -453,6 +479,10 @@ static struct vm_area_struct *vma_to_res
 		return ERR_PTR(-EINVAL);
 	}
 
+	if (flags & MREMAP_DONTUNMAP && (!vma_is_anonymous(vma) ||
+			vma->vm_flags & VM_SHARED))
+		return ERR_PTR(-EINVAL);
+
 	if (is_vm_hugetlb_page(vma))
 		return ERR_PTR(-EINVAL);
 
@@ -497,7 +527,7 @@ static struct vm_area_struct *vma_to_res
 
 static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
 		unsigned long new_addr, unsigned long new_len, bool *locked,
-		struct vm_userfaultfd_ctx *uf,
+		unsigned long flags, struct vm_userfaultfd_ctx *uf,
 		struct list_head *uf_unmap_early,
 		struct list_head *uf_unmap)
 {
@@ -505,7 +535,7 @@ static unsigned long mremap_to(unsigned
 	struct vm_area_struct *vma;
 	unsigned long ret = -EINVAL;
 	unsigned long charged = 0;
-	unsigned long map_flags;
+	unsigned long map_flags = 0;
 
 	if (offset_in_page(new_addr))
 		goto out;
@@ -534,9 +564,11 @@ static unsigned long mremap_to(unsigned
 	if ((mm->map_count + 2) >= sysctl_max_map_count - 3)
 		return -ENOMEM;
 
-	ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
-	if (ret)
-		goto out;
+	if (flags & MREMAP_FIXED) {
+		ret = do_munmap(mm, new_addr, new_len, uf_unmap_early);
+		if (ret)
+			goto out;
+	}
 
 	if (old_len >= new_len) {
 		ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
@@ -545,13 +577,26 @@ static unsigned long mremap_to(unsigned
 		old_len = new_len;
 	}
 
-	vma = vma_to_resize(addr, old_len, new_len, &charged);
+	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out;
 	}
 
-	map_flags = MAP_FIXED;
+	/*
+	 * MREMAP_DONTUNMAP expands by new_len - (new_len - old_len), we will
+	 * check that we can expand by new_len and vma_to_resize will handle
+	 * the vma growing which is (new_len - old_len).
+	 */
+	if (flags & MREMAP_DONTUNMAP &&
+		!may_expand_vm(mm, vma->vm_flags, new_len >> PAGE_SHIFT)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	if (flags & MREMAP_FIXED)
+		map_flags |= MAP_FIXED;
+
 	if (vma->vm_flags & VM_MAYSHARE)
 		map_flags |= MAP_SHARED;
 
@@ -561,10 +606,16 @@ static unsigned long mremap_to(unsigned
 	if (IS_ERR_VALUE(ret))
 		goto out1;
 
-	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+	/* We got a new mapping */
+	if (!(flags & MREMAP_FIXED))
+		new_addr = ret;
+
+	ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, flags, uf,
 		       uf_unmap);
+
 	if (!(offset_in_page(ret)))
 		goto out;
+
 out1:
 	vm_unacct_memory(charged);
 
@@ -609,12 +660,16 @@ SYSCALL_DEFINE5(mremap, unsigned long, a
 	addr = untagged_addr(addr);
 	new_addr = untagged_addr(new_addr);
 
-	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
+	if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE | MREMAP_DONTUNMAP))
 		return ret;
 
 	if (flags & MREMAP_FIXED && !(flags & MREMAP_MAYMOVE))
 		return ret;
 
+	/* MREMAP_DONTUNMAP is always a move */
+	if (flags & MREMAP_DONTUNMAP && !(flags & MREMAP_MAYMOVE))
+		return ret;
+
 	if (offset_in_page(addr))
 		return ret;
 
@@ -632,9 +687,10 @@ SYSCALL_DEFINE5(mremap, unsigned long, a
 	if (down_write_killable(&current->mm->mmap_sem))
 		return -EINTR;
 
-	if (flags & MREMAP_FIXED) {
+	if (flags & MREMAP_FIXED || flags & MREMAP_DONTUNMAP) {
 		ret = mremap_to(addr, old_len, new_addr, new_len,
-				&locked, &uf, &uf_unmap_early, &uf_unmap);
+				&locked, flags, &uf, &uf_unmap_early,
+				&uf_unmap);
 		goto out;
 	}
 
@@ -662,7 +718,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, a
 	/*
 	 * Ok, we need to grow..
 	 */
-	vma = vma_to_resize(addr, old_len, new_len, &charged);
+	vma = vma_to_resize(addr, old_len, new_len, flags, &charged);
 	if (IS_ERR(vma)) {
 		ret = PTR_ERR(vma);
 		goto out;
@@ -712,7 +768,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, a
 		}
 
 		ret = move_vma(vma, addr, old_len, new_len, new_addr,
-			       &locked, &uf, &uf_unmap);
+			       &locked, flags, &uf, &uf_unmap);
 	}
 out:
 	if (offset_in_page(ret)) {
_

Patches currently in -mm which might be from bgeffon@xxxxxxxxxx are

mm-add-mremap_dontunmap-to-mremap.patch





[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux