- mm-posix-memory-lock.patch removed from -mm tree

akpm@xxxxxxxx · Wed, 10 May 2006 23:38:27 -0700

The patch titled

     mm: POSIX Memory Lock

has been removed from the -mm tree.  Its filename is

     mm-posix-memory-lock.patch

This patch was probably dropped from -mm because
it has now been merged into a subsystem tree or
into Linus's tree, or because it was folded into
its parent patch in the -mm tree.


From: "Stone Wang" <pwstone@xxxxxxxxx>

Currently, Linux's mlock series memory locks/unlocks may fail with part of
their jobs done, thus may confuse the programmers of which part of memory
is locked, which is not.

While a better implementation is transaction-like POSIX memory lock.

POSIX mlock/munlock :

http://www.opengroup.org/onlinepubs/009695399/functions/mlock.html

RETURN VALUE

   Upon successful completion, the mlock() and munlock() functions
shall return a value of zero. Otherwise, no change is made to any
locks in the address space of the process, and the function shall
return a value of -1 and set errno to indicate the error.

POSIX mlockall/munlockall :

http://www.opengroup.org/onlinepubs/009695399/functions/mlockall.html

RETURN VALUE

   Upon successful completion, the mlockall() function shall return a
value of zero.  Otherwise, no additional memory shall be locked, and
the function shall return a value of -1 and set errno to indicate the
error. The effect of failure of mlockall() on previously existing
locks in the address space is unspecified.

   If it is supported by the implementation, the munlockall() function
shall always return a value of zero. Otherwise, the function shall
return a value of -1 and set errno to indicate the error.

The patch try to fix this, tests proved it works.

Nick Piggin suggested that the patch submited alone, as well as using 1
bit= of vm_flags instead of adding 1 member to vm_area_struct.  Special
thanks to hi= m.  Besides, the patch is largely rewritten to make it
clearer.

Signed-off-by: Shaoping Wang <pwstone@xxxxxxxxx>
Cc: Hugh Dickins <hugh@xxxxxxxxxxx>
Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 include/linux/mm.h |    1 
 mm/mlock.c         |  281 +++++++++++++++++++++++++++++--------------
 2 files changed, 195 insertions(+), 87 deletions(-)

diff -puN include/linux/mm.h~mm-posix-memory-lock include/linux/mm.h

--- 25/include/linux/mm.h~mm-posix-memory-lock	Wed Mar 29 16:22:48 2006
+++ 25-akpm/include/linux/mm.h	Wed Mar 29 16:22:48 2006
@@ -166,6 +166,7 @@ extern unsigned int kobjsize(const void 
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
 #define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
+#define VM_CHANGELOCK	0x04000000	/* The vma just has VM_LOCKED bit changed */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff -puN mm/mlock.c~mm-posix-memory-lock mm/mlock.c
--- 25/mm/mlock.c~mm-posix-memory-lock	Wed Mar 29 16:22:48 2006
+++ 25-akpm/mm/mlock.c	Wed Mar 29 16:22:48 2006
@@ -3,6 +3,7 @@
  *
  *  (C) Copyright 1995 Linus Torvalds
  *  (C) Copyright 2002 Christoph Hellwig
+ *  (C) Copyright 2006 Peter Wang
  */
 
 #include <linux/capability.h>
@@ -11,72 +12,120 @@
 #include <linux/mempolicy.h>
 #include <linux/syscalls.h>
 
-
-static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev,
-	unsigned long start, unsigned long end, unsigned int newflags)
+static int do_mlock(unsigned long start, size_t len,unsigned int jump_hole)
 {
-	struct mm_struct * mm = vma->vm_mm;
-	pgoff_t pgoff;
-	int pages;
+	unsigned long  end = 0, vmoff = 0;
+	unsigned long  pages = 0;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct * vma, *prev, **pprev,*next;
 	int ret = 0;
 
-	if (newflags == vma->vm_flags) {
-		*prev = vma;
-		goto out;
-	}
+	len = PAGE_ALIGN(len);
+	end = start + len;
+	if (end < start)
+		return -EINVAL;
+	if (end == start)
+		return 0;
 
-	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
-	*prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma,
-			  vma->vm_file, pgoff, vma_policy(vma));
-	if (*prev) {
-		vma = *prev;
-		goto success;
-	}
+	vma = find_vma_prev(current->mm, start, &prev);
+	if (!vma || vma->vm_start > start)
+		return -ENOMEM;
 
-	*prev = vma;
+	while (vma->vm_start < end) {
+		if (vma->vm_flags & VM_LOCKED) {
+			if (vma->vm_end < end)
+				goto next;
+			else
+				break;
+		} else {
+			if (vma->vm_start < start) {
+				prev = vma;
+				ret = split_vma(mm, prev, start, 0);
+				if (!ret) {
+					vma = prev->vm_next;
+					vmoff = vma->vm_end;
+				}
+				else
+					break;
+			}
+			if (vma->vm_end > end) {
+				ret = split_vma(mm, vma, end, 0);
+   				if (!ret)
+					vmoff = vma->vm_end;
+				else
+					break;
+			}
+		}
 
-	if (start != vma->vm_start) {
-		ret = split_vma(mm, vma, start, 1);
-		if (ret)
-			goto out;
-	}
+		pages += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
 
-	if (end != vma->vm_end) {
-		ret = split_vma(mm, vma, end, 0);
-		if (ret)
-			goto out;
+		vma->vm_flags |= VM_LOCKED;
+		vma->vm_flags |= VM_CHANGELOCK;
+		vmoff = vma->vm_end;
+		if (!(vma->vm_flags & VM_IO)) {
+   			ret = make_pages_present(vma->vm_start, vma->vm_end);
+			if (ret)
+				break;
+		}
+next:
+		if (vma->vm_end == end)
+			break;
+		prev = vma;
+		vma = vma->vm_next;
+
+		/* If called from do_mlockall,
+		 * we may jump over holes.
+		 */
+		if (jump_hole) {
+			if (vma)
+				continue;
+			else
+				break;
+		}
+		else if (!vma || vma->vm_start != prev->vm_end) {
+			ret = -ENOMEM;
+			break;
+		}
 	}
 
-success:
-	/*
-	 * vm_flags is protected by the mmap_sem held in write mode.
-	 * It's okay if try_to_unmap_one unmaps a page just after we
-	 * set VM_LOCKED, make_pages_present below will bring it back.
-	 */
-	vma->vm_flags = newflags;
+	pprev = &prev;
+	vma = find_vma_prev(mm, start, pprev);
 
 	/*
-	 * Keep track of amount of locked VM.
+	 * Try to merge the vmas.
+	 * If error happened, rollback vmas to original status.
 	 */
-	pages = (end - start) >> PAGE_SHIFT;
-	if (newflags & VM_LOCKED) {
-		pages = -pages;
-		if (!(newflags & VM_IO))
-			ret = make_pages_present(start, end);
+	while (vma && vma->vm_end <= vmoff ) {
+		if (vma->vm_flags & VM_CHANGELOCK) {
+			vma->vm_flags &= ~VM_CHANGELOCK;
+			if (ret)
+				vma->vm_flags &= ~VM_LOCKED;
+		}
+		next = vma->vm_next;
+		if (next && (next->vm_flags & VM_CHANGELOCK)) {
+			next->vm_flags &= ~VM_CHANGELOCK;
+			if (ret)
+				next->vm_flags &= ~VM_LOCKED;
+		}
+		*pprev = vma_merge(mm, *pprev, vma->vm_start, vma->vm_end, vma->vm_flags,
+					vma->anon_vma,vma->vm_file, vma->vm_pgoff, vma_policy(vma));
+		if (*pprev)
+			vma = *pprev;
+		vma = vma->vm_next;
 	}
 
-	vma->vm_mm->locked_vm -= pages;
-out:
-	if (ret == -ENOMEM)
-		ret = -EAGAIN;
+	if (!ret)
+		mm->locked_vm += pages;
 	return ret;
 }
 
-static int do_mlock(unsigned long start, size_t len, int on)
+static int do_munlock(unsigned long start, size_t len, unsigned int jump_hole)
 {
-	unsigned long nstart, end, tmp;
-	struct vm_area_struct * vma, * prev;
-	int error;
+	unsigned long  end = 0,vmoff = 0;
+	unsigned long  pages = 0;
+	struct mm_struct *mm=current->mm;
+	struct vm_area_struct * vma, *prev, **pprev, *next;
+	int ret = 0;
 
 	len = PAGE_ALIGN(len);
 	end = start + len;
@@ -88,37 +137,86 @@ static int do_mlock(unsigned long start,
 	if (!vma || vma->vm_start > start)
 		return -ENOMEM;
 
-	if (start > vma->vm_start)
-		prev = vma;
-
-	for (nstart = start ; ; ) {
-		unsigned int newflags;
+	while (vma->vm_start < end) {
+		if (!(vma->vm_flags & VM_LOCKED)) {
+			if(vma->vm_end < end)
+				goto next;
+			else
+				break;
+		} else {
+			if (vma->vm_start < start) {
+				prev = vma;
+				ret = split_vma(mm, prev, start, 0);
+				if (!ret) {
+					vma = prev->vm_next;
+					vmoff = vma->vm_end;
+				}
+				else
+					break;
+			}
+			if (vma->vm_end > end) {
+				ret = split_vma(mm, vma, end, 0);
+				if (!ret)
+					vmoff = vma->vm_end;
+				else
+					break;
+			}
+		}
 
-		/* Here we know that  vma->vm_start <= nstart < vma->vm_end. */
+		/* Delay clearing VM_LOCKED bit here,
+		 * thus make the possibly rollback easy.
+		 */
+		vma->vm_flags |= VM_CHANGELOCK;
+		vmoff = vma->vm_end;
+		pages += (vma->vm_end -vma->vm_start) >> PAGE_SHIFT;
 
-		newflags = vma->vm_flags | VM_LOCKED;
-		if (!on)
-			newflags &= ~VM_LOCKED;
-
-		tmp = vma->vm_end;
-		if (tmp > end)
-			tmp = end;
-		error = mlock_fixup(vma, &prev, nstart, tmp, newflags);
-		if (error)
-			break;
-		nstart = tmp;
-		if (nstart < prev->vm_end)
-			nstart = prev->vm_end;
-		if (nstart >= end)
+next:
+		if (vma->vm_end == end)
 			break;
+		prev = vma;
+		vma = vma->vm_next;
 
-		vma = prev->vm_next;
-		if (!vma || vma->vm_start != nstart) {
-			error = -ENOMEM;
+		/* If called from munlockall,
+		 * we may jump over holes.
+		 */
+		if (jump_hole) {
+			if (!vma)
+				break;
+			else
+				continue;
+		}
+		else if (!vma || (vma->vm_start != prev->vm_end)) {
+			ret = -ENOMEM;
 			break;
 		}
 	}
-	return error;
+
+	pprev = &prev;
+	vma = find_vma_prev(current->mm, start, pprev);
+
+	while (vma && vma->vm_end <= vmoff) {
+		if (vma->vm_flags & VM_CHANGELOCK) {
+			vma->vm_flags &= ~VM_CHANGELOCK;
+			if (!ret)
+				vma->vm_flags &=~VM_LOCKED;
+		}
+		next = vma->vm_next;
+		if (next && (next->vm_flags & VM_CHANGELOCK)) {
+			next->vm_flags &= ~VM_CHANGELOCK;
+			if (!ret)
+				next->vm_flags &= ~VM_LOCKED;
+		}
+		*pprev = vma_merge(mm, *pprev, vma->vm_start, vma->vm_end, vma->vm_flags,
+				vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma));
+		if (*pprev)
+			vma = *pprev;
+		vma = vma->vm_next;
+	}
+
+	if (!ret)
+		mm->locked_vm -= pages;
+
+	return ret;
 }
 
 asmlinkage long sys_mlock(unsigned long start, size_t len)
@@ -142,7 +240,7 @@ asmlinkage long sys_mlock(unsigned long 
 
 	/* check against resource limits */
 	if ((locked <= lock_limit) || capable(CAP_IPC_LOCK))
-		error = do_mlock(start, len, 1);
+		error = do_mlock(start, len, 0);
 	up_write(&current->mm->mmap_sem);
 	return error;
 }
@@ -154,34 +252,43 @@ asmlinkage long sys_munlock(unsigned lon
 	down_write(&current->mm->mmap_sem);
 	len = PAGE_ALIGN(len + (start & ~PAGE_MASK));
 	start &= PAGE_MASK;
-	ret = do_mlock(start, len, 0);
+	ret = do_munlock(start, len, 0);
 	up_write(&current->mm->mmap_sem);
 	return ret;
 }
 
 static int do_mlockall(int flags)
 {
-	struct vm_area_struct * vma, * prev = NULL;
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct * vma;
 	unsigned int def_flags = 0;
+	unsigned long start;
+	int ret = 0;
 
 	if (flags & MCL_FUTURE)
 		def_flags = VM_LOCKED;
-	current->mm->def_flags = def_flags;
+	mm->def_flags = def_flags;
 	if (flags == MCL_FUTURE)
 		goto out;
+	vma = mm->mmap;
+	start = vma->vm_start;
+	ret = do_mlock(start, TASK_SIZE, 1);
+out:
+	return ret;
+}
 
-	for (vma = current->mm->mmap; vma ; vma = prev->vm_next) {
-		unsigned int newflags;
+static int do_munlockall(void)
+{
+	struct mm_struct *mm = current->mm;
+	struct vm_area_struct * vma;
+	unsigned long start;
+	int ret;
 
-		newflags = vma->vm_flags | VM_LOCKED;
-		if (!(flags & MCL_CURRENT))
-			newflags &= ~VM_LOCKED;
+	vma = mm->mmap;
+	start = vma->vm_start;
+	ret = do_munlock(start, TASK_SIZE, 1);
 
-		/* Ignore errors */
-		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
-	}
-out:
-	return 0;
+	return ret;
 }
 
 asmlinkage long sys_mlockall(int flags)
@@ -215,7 +322,7 @@ asmlinkage long sys_munlockall(void)
 	int ret;
 
 	down_write(&current->mm->mmap_sem);
-	ret = do_mlockall(0);
+	ret = do_munlockall();
 	up_write(&current->mm->mmap_sem);
 	return ret;
 }
_

Patches currently in -mm which might be from pwstone@xxxxxxxxx are


-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html