The patch titled mm: POSIX Memory Lock has been removed from the -mm tree. Its filename is mm-posix-memory-lock.patch This patch was probably dropped from -mm because it has now been merged into a subsystem tree or into Linus's tree, or because it was folded into its parent patch in the -mm tree. From: "Stone Wang" <pwstone@xxxxxxxxx> Currently, Linux's mlock series memory locks/unlocks may fail with part of their jobs done, thus may confuse the programmers of which part of memory is locked, which is not. While a better implementation is transaction-like POSIX memory lock. POSIX mlock/munlock : http://www.opengroup.org/onlinepubs/009695399/functions/mlock.html RETURN VALUE Upon successful completion, the mlock() and munlock() functions shall return a value of zero. Otherwise, no change is made to any locks in the address space of the process, and the function shall return a value of -1 and set errno to indicate the error. POSIX mlockall/munlockall : http://www.opengroup.org/onlinepubs/009695399/functions/mlockall.html RETURN VALUE Upon successful completion, the mlockall() function shall return a value of zero. Otherwise, no additional memory shall be locked, and the function shall return a value of -1 and set errno to indicate the error. The effect of failure of mlockall() on previously existing locks in the address space is unspecified. If it is supported by the implementation, the munlockall() function shall always return a value of zero. Otherwise, the function shall return a value of -1 and set errno to indicate the error. The patch try to fix this, tests proved it works. Nick Piggin suggested that the patch submited alone, as well as using 1 bit= of vm_flags instead of adding 1 member to vm_area_struct. Special thanks to hi= m. Besides, the patch is largely rewritten to make it clearer. Signed-off-by: Shaoping Wang <pwstone@xxxxxxxxx> Cc: Hugh Dickins <hugh@xxxxxxxxxxx> Cc: Nick Piggin <nickpiggin@xxxxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxx> --- include/linux/mm.h | 1 mm/mlock.c | 281 +++++++++++++++++++++++++++++-------------- 2 files changed, 195 insertions(+), 87 deletions(-) diff -puN include/linux/mm.h~mm-posix-memory-lock include/linux/mm.h --- 25/include/linux/mm.h~mm-posix-memory-lock Wed Mar 29 16:22:48 2006 +++ 25-akpm/include/linux/mm.h Wed Mar 29 16:22:48 2006 @@ -166,6 +166,7 @@ extern unsigned int kobjsize(const void #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ +#define VM_CHANGELOCK 0x04000000 /* The vma just has VM_LOCKED bit changed */ #ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS diff -puN mm/mlock.c~mm-posix-memory-lock mm/mlock.c --- 25/mm/mlock.c~mm-posix-memory-lock Wed Mar 29 16:22:48 2006 +++ 25-akpm/mm/mlock.c Wed Mar 29 16:22:48 2006 @@ -3,6 +3,7 @@ * * (C) Copyright 1995 Linus Torvalds * (C) Copyright 2002 Christoph Hellwig + * (C) Copyright 2006 Peter Wang */ #include <linux/capability.h> @@ -11,72 +12,120 @@ #include <linux/mempolicy.h> #include <linux/syscalls.h> - -static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, - unsigned long start, unsigned long end, unsigned int newflags) +static int do_mlock(unsigned long start, size_t len,unsigned int jump_hole) { - struct mm_struct * mm = vma->vm_mm; - pgoff_t pgoff; - int pages; + unsigned long end = 0, vmoff = 0; + unsigned long pages = 0; + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma, *prev, **pprev,*next; int ret = 0; - if (newflags == vma->vm_flags) { - *prev = vma; - goto out; - } + len = PAGE_ALIGN(len); + end = start + len; + if (end < start) + return -EINVAL; + if (end == start) + return 0; - pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); - *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, - vma->vm_file, pgoff, vma_policy(vma)); - if (*prev) { - vma = *prev; - goto success; - } + vma = find_vma_prev(current->mm, start, &prev); + if (!vma || vma->vm_start > start) + return -ENOMEM; - *prev = vma; + while (vma->vm_start < end) { + if (vma->vm_flags & VM_LOCKED) { + if (vma->vm_end < end) + goto next; + else + break; + } else { + if (vma->vm_start < start) { + prev = vma; + ret = split_vma(mm, prev, start, 0); + if (!ret) { + vma = prev->vm_next; + vmoff = vma->vm_end; + } + else + break; + } + if (vma->vm_end > end) { + ret = split_vma(mm, vma, end, 0); + if (!ret) + vmoff = vma->vm_end; + else + break; + } + } - if (start != vma->vm_start) { - ret = split_vma(mm, vma, start, 1); - if (ret) - goto out; - } + pages += (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; - if (end != vma->vm_end) { - ret = split_vma(mm, vma, end, 0); - if (ret) - goto out; + vma->vm_flags |= VM_LOCKED; + vma->vm_flags |= VM_CHANGELOCK; + vmoff = vma->vm_end; + if (!(vma->vm_flags & VM_IO)) { + ret = make_pages_present(vma->vm_start, vma->vm_end); + if (ret) + break; + } +next: + if (vma->vm_end == end) + break; + prev = vma; + vma = vma->vm_next; + + /* If called from do_mlockall, + * we may jump over holes. + */ + if (jump_hole) { + if (vma) + continue; + else + break; + } + else if (!vma || vma->vm_start != prev->vm_end) { + ret = -ENOMEM; + break; + } } -success: - /* - * vm_flags is protected by the mmap_sem held in write mode. - * It's okay if try_to_unmap_one unmaps a page just after we - * set VM_LOCKED, make_pages_present below will bring it back. - */ - vma->vm_flags = newflags; + pprev = &prev; + vma = find_vma_prev(mm, start, pprev); /* - * Keep track of amount of locked VM. + * Try to merge the vmas. + * If error happened, rollback vmas to original status. */ - pages = (end - start) >> PAGE_SHIFT; - if (newflags & VM_LOCKED) { - pages = -pages; - if (!(newflags & VM_IO)) - ret = make_pages_present(start, end); + while (vma && vma->vm_end <= vmoff ) { + if (vma->vm_flags & VM_CHANGELOCK) { + vma->vm_flags &= ~VM_CHANGELOCK; + if (ret) + vma->vm_flags &= ~VM_LOCKED; + } + next = vma->vm_next; + if (next && (next->vm_flags & VM_CHANGELOCK)) { + next->vm_flags &= ~VM_CHANGELOCK; + if (ret) + next->vm_flags &= ~VM_LOCKED; + } + *pprev = vma_merge(mm, *pprev, vma->vm_start, vma->vm_end, vma->vm_flags, + vma->anon_vma,vma->vm_file, vma->vm_pgoff, vma_policy(vma)); + if (*pprev) + vma = *pprev; + vma = vma->vm_next; } - vma->vm_mm->locked_vm -= pages; -out: - if (ret == -ENOMEM) - ret = -EAGAIN; + if (!ret) + mm->locked_vm += pages; return ret; } -static int do_mlock(unsigned long start, size_t len, int on) +static int do_munlock(unsigned long start, size_t len, unsigned int jump_hole) { - unsigned long nstart, end, tmp; - struct vm_area_struct * vma, * prev; - int error; + unsigned long end = 0,vmoff = 0; + unsigned long pages = 0; + struct mm_struct *mm=current->mm; + struct vm_area_struct * vma, *prev, **pprev, *next; + int ret = 0; len = PAGE_ALIGN(len); end = start + len; @@ -88,37 +137,86 @@ static int do_mlock(unsigned long start, if (!vma || vma->vm_start > start) return -ENOMEM; - if (start > vma->vm_start) - prev = vma; - - for (nstart = start ; ; ) { - unsigned int newflags; + while (vma->vm_start < end) { + if (!(vma->vm_flags & VM_LOCKED)) { + if(vma->vm_end < end) + goto next; + else + break; + } else { + if (vma->vm_start < start) { + prev = vma; + ret = split_vma(mm, prev, start, 0); + if (!ret) { + vma = prev->vm_next; + vmoff = vma->vm_end; + } + else + break; + } + if (vma->vm_end > end) { + ret = split_vma(mm, vma, end, 0); + if (!ret) + vmoff = vma->vm_end; + else + break; + } + } - /* Here we know that vma->vm_start <= nstart < vma->vm_end. */ + /* Delay clearing VM_LOCKED bit here, + * thus make the possibly rollback easy. + */ + vma->vm_flags |= VM_CHANGELOCK; + vmoff = vma->vm_end; + pages += (vma->vm_end -vma->vm_start) >> PAGE_SHIFT; - newflags = vma->vm_flags | VM_LOCKED; - if (!on) - newflags &= ~VM_LOCKED; - - tmp = vma->vm_end; - if (tmp > end) - tmp = end; - error = mlock_fixup(vma, &prev, nstart, tmp, newflags); - if (error) - break; - nstart = tmp; - if (nstart < prev->vm_end) - nstart = prev->vm_end; - if (nstart >= end) +next: + if (vma->vm_end == end) break; + prev = vma; + vma = vma->vm_next; - vma = prev->vm_next; - if (!vma || vma->vm_start != nstart) { - error = -ENOMEM; + /* If called from munlockall, + * we may jump over holes. + */ + if (jump_hole) { + if (!vma) + break; + else + continue; + } + else if (!vma || (vma->vm_start != prev->vm_end)) { + ret = -ENOMEM; break; } } - return error; + + pprev = &prev; + vma = find_vma_prev(current->mm, start, pprev); + + while (vma && vma->vm_end <= vmoff) { + if (vma->vm_flags & VM_CHANGELOCK) { + vma->vm_flags &= ~VM_CHANGELOCK; + if (!ret) + vma->vm_flags &=~VM_LOCKED; + } + next = vma->vm_next; + if (next && (next->vm_flags & VM_CHANGELOCK)) { + next->vm_flags &= ~VM_CHANGELOCK; + if (!ret) + next->vm_flags &= ~VM_LOCKED; + } + *pprev = vma_merge(mm, *pprev, vma->vm_start, vma->vm_end, vma->vm_flags, + vma->anon_vma, vma->vm_file, vma->vm_pgoff, vma_policy(vma)); + if (*pprev) + vma = *pprev; + vma = vma->vm_next; + } + + if (!ret) + mm->locked_vm -= pages; + + return ret; } asmlinkage long sys_mlock(unsigned long start, size_t len) @@ -142,7 +240,7 @@ asmlinkage long sys_mlock(unsigned long /* check against resource limits */ if ((locked <= lock_limit) || capable(CAP_IPC_LOCK)) - error = do_mlock(start, len, 1); + error = do_mlock(start, len, 0); up_write(¤t->mm->mmap_sem); return error; } @@ -154,34 +252,43 @@ asmlinkage long sys_munlock(unsigned lon down_write(¤t->mm->mmap_sem); len = PAGE_ALIGN(len + (start & ~PAGE_MASK)); start &= PAGE_MASK; - ret = do_mlock(start, len, 0); + ret = do_munlock(start, len, 0); up_write(¤t->mm->mmap_sem); return ret; } static int do_mlockall(int flags) { - struct vm_area_struct * vma, * prev = NULL; + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; unsigned int def_flags = 0; + unsigned long start; + int ret = 0; if (flags & MCL_FUTURE) def_flags = VM_LOCKED; - current->mm->def_flags = def_flags; + mm->def_flags = def_flags; if (flags == MCL_FUTURE) goto out; + vma = mm->mmap; + start = vma->vm_start; + ret = do_mlock(start, TASK_SIZE, 1); +out: + return ret; +} - for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { - unsigned int newflags; +static int do_munlockall(void) +{ + struct mm_struct *mm = current->mm; + struct vm_area_struct * vma; + unsigned long start; + int ret; - newflags = vma->vm_flags | VM_LOCKED; - if (!(flags & MCL_CURRENT)) - newflags &= ~VM_LOCKED; + vma = mm->mmap; + start = vma->vm_start; + ret = do_munlock(start, TASK_SIZE, 1); - /* Ignore errors */ - mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); - } -out: - return 0; + return ret; } asmlinkage long sys_mlockall(int flags) @@ -215,7 +322,7 @@ asmlinkage long sys_munlockall(void) int ret; down_write(¤t->mm->mmap_sem); - ret = do_mlockall(0); + ret = do_munlockall(); up_write(¤t->mm->mmap_sem); return ret; } _ Patches currently in -mm which might be from pwstone@xxxxxxxxx are - To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html