On Wed 26-08-15 14:24:23, Eric B Munson wrote: > The previous patch introduced a flag that specified pages in a VMA > should be placed on the unevictable LRU, but they should not be made > present when the area is created. This patch adds the ability to set > this state via the new mlock system calls. > > We add MLOCK_ONFAULT for mlock2 and MCL_ONFAULT for mlockall. > MLOCK_ONFAULT will set the VM_LOCKONFAULT modifier for VM_LOCKED. > MCL_ONFAULT should be used as a modifier to the two other mlockall > flags. When used with MCL_CURRENT, all current mappings will be marked > with VM_LOCKED | VM_LOCKONFAULT. When used with MCL_FUTURE, the > mm->def_flags will be marked with VM_LOCKED | VM_LOCKONFAULT. When used > with both MCL_CURRENT and MCL_FUTURE, all current mappings and > mm->def_flags will be marked with VM_LOCKED | VM_LOCKONFAULT. > > Prior to this patch, mlockall() will unconditionally clear the > mm->def_flags any time it is called without MCL_FUTURE. This behavior > is maintained after adding MCL_ONFAULT. If a call to > mlockall(MCL_FUTURE) is followed by mlockall(MCL_CURRENT), the > mm->def_flags will be cleared and new VMAs will be unlocked. This > remains true with or without MCL_ONFAULT in either mlockall() > invocation. > > munlock() will unconditionally clear both vma flags. munlockall() > unconditionally clears for VMA flags on all VMAs and in the > mm->def_flags field. > > Signed-off-by: Eric B Munson <emunson@xxxxxxxxxx> > Acked-by: Vlastimil Babka <vbabka@xxxxxxx> > Cc: Michal Hocko <mhocko@xxxxxxx> > Cc: Vlastimil Babka <vbabka@xxxxxxx> > Cc: Jonathan Corbet <corbet@xxxxxxx> > Cc: "Kirill A. Shutemov" <kirill@xxxxxxxxxxxxx> > Cc: linux-alpha@xxxxxxxxxxxxxxx > Cc: linux-kernel@xxxxxxxxxxxxxxx > Cc: linux-mips@xxxxxxxxxxxxxx > Cc: linux-parisc@xxxxxxxxxxxxxxx > Cc: linuxppc-dev@xxxxxxxxxxxxxxxx > Cc: sparclinux@xxxxxxxxxxxxxxx > Cc: linux-xtensa@xxxxxxxxxxxxxxxx > Cc: linux-arch@xxxxxxxxxxxxxxx > Cc: linux-api@xxxxxxxxxxxxxxx > Cc: linux-mm@xxxxxxxxx I haven't checked the arch specific bits but the core part looks good to me. Acked-by: Michal Hocko <mhocko@xxxxxxxx> > --- > arch/alpha/include/uapi/asm/mman.h | 3 ++ > arch/mips/include/uapi/asm/mman.h | 6 ++++ > arch/parisc/include/uapi/asm/mman.h | 3 ++ > arch/powerpc/include/uapi/asm/mman.h | 1 + > arch/sparc/include/uapi/asm/mman.h | 1 + > arch/tile/include/uapi/asm/mman.h | 1 + > arch/xtensa/include/uapi/asm/mman.h | 6 ++++ > include/uapi/asm-generic/mman-common.h | 5 ++++ > include/uapi/asm-generic/mman.h | 1 + > mm/mlock.c | 52 +++++++++++++++++++++++++--------- > 10 files changed, 66 insertions(+), 13 deletions(-) > > diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h > index 0086b47..f2f9496 100644 > --- a/arch/alpha/include/uapi/asm/mman.h > +++ b/arch/alpha/include/uapi/asm/mman.h > @@ -37,6 +37,9 @@ > > #define MCL_CURRENT 8192 /* lock all currently mapped pages */ > #define MCL_FUTURE 16384 /* lock all additions to address space */ > +#define MCL_ONFAULT 32768 /* lock all pages that are faulted in */ > + > +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ > > #define MADV_NORMAL 0 /* no further special treatment */ > #define MADV_RANDOM 1 /* expect random page references */ > diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h > index cfcb876..97c03f4 100644 > --- a/arch/mips/include/uapi/asm/mman.h > +++ b/arch/mips/include/uapi/asm/mman.h > @@ -61,6 +61,12 @@ > */ > #define MCL_CURRENT 1 /* lock all current mappings */ > #define MCL_FUTURE 2 /* lock all future mappings */ > +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ > + > +/* > + * Flags for mlock > + */ > +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ > > #define MADV_NORMAL 0 /* no further special treatment */ > #define MADV_RANDOM 1 /* expect random page references */ > diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h > index 294d251..ecc3ae1 100644 > --- a/arch/parisc/include/uapi/asm/mman.h > +++ b/arch/parisc/include/uapi/asm/mman.h > @@ -31,6 +31,9 @@ > > #define MCL_CURRENT 1 /* lock all current mappings */ > #define MCL_FUTURE 2 /* lock all future mappings */ > +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ > + > +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ > > #define MADV_NORMAL 0 /* no further special treatment */ > #define MADV_RANDOM 1 /* expect random page references */ > diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h > index 6ea26df..03c06ba 100644 > --- a/arch/powerpc/include/uapi/asm/mman.h > +++ b/arch/powerpc/include/uapi/asm/mman.h > @@ -22,6 +22,7 @@ > > #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ > #define MCL_FUTURE 0x4000 /* lock all additions to address space */ > +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ > > #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ > #define MAP_NONBLOCK 0x10000 /* do not block on IO */ > diff --git a/arch/sparc/include/uapi/asm/mman.h b/arch/sparc/include/uapi/asm/mman.h > index 0b14df3..9765896 100644 > --- a/arch/sparc/include/uapi/asm/mman.h > +++ b/arch/sparc/include/uapi/asm/mman.h > @@ -17,6 +17,7 @@ > > #define MCL_CURRENT 0x2000 /* lock all currently mapped pages */ > #define MCL_FUTURE 0x4000 /* lock all additions to address space */ > +#define MCL_ONFAULT 0x8000 /* lock all pages that are faulted in */ > > #define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */ > #define MAP_NONBLOCK 0x10000 /* do not block on IO */ > diff --git a/arch/tile/include/uapi/asm/mman.h b/arch/tile/include/uapi/asm/mman.h > index 81b8fc3..63ee13f 100644 > --- a/arch/tile/include/uapi/asm/mman.h > +++ b/arch/tile/include/uapi/asm/mman.h > @@ -36,6 +36,7 @@ > */ > #define MCL_CURRENT 1 /* lock all current mappings */ > #define MCL_FUTURE 2 /* lock all future mappings */ > +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ > > > #endif /* _ASM_TILE_MMAN_H */ > diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h > index 201aec0..360944e 100644 > --- a/arch/xtensa/include/uapi/asm/mman.h > +++ b/arch/xtensa/include/uapi/asm/mman.h > @@ -74,6 +74,12 @@ > */ > #define MCL_CURRENT 1 /* lock all current mappings */ > #define MCL_FUTURE 2 /* lock all future mappings */ > +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ > + > +/* > + * Flags for mlock > + */ > +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ > > #define MADV_NORMAL 0 /* no further special treatment */ > #define MADV_RANDOM 1 /* expect random page references */ > diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h > index ddc3b36..a74dd84 100644 > --- a/include/uapi/asm-generic/mman-common.h > +++ b/include/uapi/asm-generic/mman-common.h > @@ -25,6 +25,11 @@ > # define MAP_UNINITIALIZED 0x0 /* Don't support this flag */ > #endif > > +/* > + * Flags for mlock > + */ > +#define MLOCK_ONFAULT 0x01 /* Lock pages in range after they are faulted in, do not prefault */ > + > #define MS_ASYNC 1 /* sync memory asynchronously */ > #define MS_INVALIDATE 2 /* invalidate the caches */ > #define MS_SYNC 4 /* synchronous memory sync */ > diff --git a/include/uapi/asm-generic/mman.h b/include/uapi/asm-generic/mman.h > index e9fe6fd..7162cd4 100644 > --- a/include/uapi/asm-generic/mman.h > +++ b/include/uapi/asm-generic/mman.h > @@ -17,5 +17,6 @@ > > #define MCL_CURRENT 1 /* lock all current mappings */ > #define MCL_FUTURE 2 /* lock all future mappings */ > +#define MCL_ONFAULT 4 /* lock all pages that are faulted in */ > > #endif /* __ASM_GENERIC_MMAN_H */ > diff --git a/mm/mlock.c b/mm/mlock.c > index 7efe27d..0747663 100644 > --- a/mm/mlock.c > +++ b/mm/mlock.c > @@ -506,7 +506,8 @@ static int mlock_fixup(struct vm_area_struct *vma, struct vm_area_struct **prev, > > if (newflags == vma->vm_flags || (vma->vm_flags & VM_SPECIAL) || > is_vm_hugetlb_page(vma) || vma == get_gate_vma(current->mm)) > - goto out; /* don't set VM_LOCKED, don't count */ > + /* don't set VM_LOCKED or VM_LOCKONFAULT and don't count */ > + goto out; > > pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT); > *prev = vma_merge(mm, *prev, start, end, newflags, vma->anon_vma, > @@ -576,7 +577,7 @@ static int apply_vma_lock_flags(unsigned long start, size_t len, > prev = vma; > > for (nstart = start ; ; ) { > - vm_flags_t newflags = vma->vm_flags & ~VM_LOCKED; > + vm_flags_t newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > > newflags |= flags; > > @@ -645,10 +646,15 @@ SYSCALL_DEFINE2(mlock, unsigned long, start, size_t, len) > > SYSCALL_DEFINE3(mlock2, unsigned long, start, size_t, len, int, flags) > { > - if (flags) > + vm_flags_t vm_flags = VM_LOCKED; > + > + if (flags & ~MLOCK_ONFAULT) > return -EINVAL; > > - return do_mlock(start, len, VM_LOCKED); > + if (flags & MLOCK_ONFAULT) > + vm_flags |= VM_LOCKONFAULT; > + > + return do_mlock(start, len, vm_flags); > } > > SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) > @@ -665,24 +671,43 @@ SYSCALL_DEFINE2(munlock, unsigned long, start, size_t, len) > return ret; > } > > +/* > + * Take the MCL_* flags passed into mlockall (or 0 if called from munlockall) > + * and translate into the appropriate modifications to mm->def_flags and/or the > + * flags for all current VMAs. > + * > + * There are a couple of subtleties with this. If mlockall() is called multiple > + * times with different flags, the values do not necessarily stack. If mlockall > + * is called once including the MCL_FUTURE flag and then a second time without > + * it, VM_LOCKED and VM_LOCKONFAULT will be cleared from mm->def_flags. > + */ > static int apply_mlockall_flags(int flags) > { > struct vm_area_struct * vma, * prev = NULL; > + vm_flags_t to_add = 0; > > - if (flags & MCL_FUTURE) > + current->mm->def_flags &= VM_LOCKED_CLEAR_MASK; > + if (flags & MCL_FUTURE) { > current->mm->def_flags |= VM_LOCKED; > - else > - current->mm->def_flags &= ~VM_LOCKED; > > - if (flags == MCL_FUTURE) > - goto out; > + if (flags & MCL_ONFAULT) > + current->mm->def_flags |= VM_LOCKONFAULT; > + > + if (!(flags & MCL_CURRENT)) > + goto out; > + } > + > + if (flags & MCL_CURRENT) { > + to_add |= VM_LOCKED; > + if (flags & MCL_ONFAULT) > + to_add |= VM_LOCKONFAULT; > + } > > for (vma = current->mm->mmap; vma ; vma = prev->vm_next) { > vm_flags_t newflags; > > - newflags = vma->vm_flags & ~VM_LOCKED; > - if (flags & MCL_CURRENT) > - newflags |= VM_LOCKED; > + newflags = vma->vm_flags & VM_LOCKED_CLEAR_MASK; > + newflags |= to_add; > > /* Ignore errors */ > mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags); > @@ -697,7 +722,8 @@ SYSCALL_DEFINE1(mlockall, int, flags) > unsigned long lock_limit; > int ret = -EINVAL; > > - if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE))) > + if (!flags || (flags & ~(MCL_CURRENT | MCL_FUTURE | MCL_ONFAULT)) || > + flags == MCL_ONFAULT) > goto out; > > ret = -EPERM; > -- > 1.9.1 -- Michal Hocko SUSE Labs