The patch titled Subject: mm: gup: add mm_lock_present() has been added to the -mm tree. Its filename is mm-gup-add-mm_lock_present.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/mm-gup-add-mm_lock_present.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/mm-gup-add-mm_lock_present.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Eric B Munson <emunson@xxxxxxxxxx> Subject: mm: gup: add mm_lock_present() The upcoming mlock(MLOCK_ONFAULT) implementation will need a way to request that all present pages in a range are locked without faulting in pages that are not present. This logic is very close to what the __mm_populate() call handles without faulting pages so the patch pulls out the pieces that can be shared and adds mm_lock_present() to gup.c. The following patch will call it from do_mlock() when MLOCK_ONFAULT is specified. Signed-off-by: Eric B Munson <emunson@xxxxxxxxxx> Cc: Jonathan Corbet <corbet@xxxxxxx> Cc: Vlastimil Babka <vbabka@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/gup.c | 172 ++++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 157 insertions(+), 15 deletions(-) diff -puN mm/gup.c~mm-gup-add-mm_lock_present mm/gup.c --- a/mm/gup.c~mm-gup-add-mm_lock_present +++ a/mm/gup.c @@ -818,6 +818,30 @@ long get_user_pages(struct task_struct * } EXPORT_SYMBOL(get_user_pages); +/* + * Helper function used by both populate_vma_page_range() and pin_user_pages + */ +static int get_gup_flags(vm_flags_t vm_flags) +{ + int gup_flags = FOLL_TOUCH | FOLL_POPULATE; + /* + * We want to touch writable mappings with a write fault in order + * to break COW, except for shared mappings because these don't COW + * and we would not want to dirty them for nothing. + */ + if ((vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) + gup_flags |= FOLL_WRITE; + + /* + * We want mlock to succeed for regions that have any permissions + * other than PROT_NONE. + */ + if (vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) + gup_flags |= FOLL_FORCE; + + return gup_flags; +} + /** * populate_vma_page_range() - populate a range of pages in the vma. * @vma: target vma @@ -850,21 +874,7 @@ long populate_vma_page_range(struct vm_a VM_BUG_ON_VMA(end > vma->vm_end, vma); VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); - gup_flags = FOLL_TOUCH | FOLL_POPULATE; - /* - * We want to touch writable mappings with a write fault in order - * to break COW, except for shared mappings because these don't COW - * and we would not want to dirty them for nothing. - */ - if ((vma->vm_flags & (VM_WRITE | VM_SHARED)) == VM_WRITE) - gup_flags |= FOLL_WRITE; - - /* - * We want mlock to succeed for regions that have any permissions - * other than PROT_NONE. - */ - if (vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)) - gup_flags |= FOLL_FORCE; + gup_flags = get_gup_flags(vma->vm_flags); /* * We made sure addr is within a VMA, so the following will @@ -874,6 +884,138 @@ long populate_vma_page_range(struct vm_a NULL, NULL, nonblocking); } +static long pin_user_pages(struct vm_area_struct *vma, unsigned long start, + unsigned long end, int *nonblocking) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long nr_pages = (end - start) / PAGE_SIZE; + int gup_flags; + long i = 0; + unsigned int page_mask; + + VM_BUG_ON(start & ~PAGE_MASK); + VM_BUG_ON(end & ~PAGE_MASK); + VM_BUG_ON_VMA(start < vma->vm_start, vma); + VM_BUG_ON_VMA(end > vma->vm_end, vma); + VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_sem), mm); + + if (!nr_pages) + return 0; + + gup_flags = get_gup_flags(vma->vm_flags); + + /* + * If FOLL_FORCE is set then do not force a full fault as the hinting + * fault information is unrelated to the reference behaviour of a task + * using the address space + */ + if (!(gup_flags & FOLL_FORCE)) + gup_flags |= FOLL_NUMA; + + vma = NULL; + + do { + struct page *page; + unsigned int foll_flags = gup_flags; + unsigned int page_increm; + + /* first iteration or cross vma bound */ + if (!vma || start >= vma->vm_end) { + vma = find_extend_vma(mm, start); + if (!vma && in_gate_area(mm, start)) { + int ret; + ret = get_gate_page(mm, start & PAGE_MASK, + gup_flags, &vma, NULL); + if (ret) + return i ? : ret; + page_mask = 0; + goto next_page; + } + + if (!vma) + return i ? : -EFAULT; + if (is_vm_hugetlb_page(vma)) { + i = follow_hugetlb_page(mm, vma, NULL, NULL, + &start, &nr_pages, i, + gup_flags); + continue; + } + } + + /* + * If we have a pending SIGKILL, don't keep pinning pages + */ + if (unlikely(fatal_signal_pending(current))) + return i ? i : -ERESTARTSYS; + cond_resched(); + page = follow_page_mask(vma, start, foll_flags, &page_mask); + if (!page) + goto next_page; + if (IS_ERR(page)) + return i ? i : PTR_ERR(page); +next_page: + page_increm = 1 + (~(start >> PAGE_SHIFT) & page_mask); + if (page_increm > nr_pages) + page_increm = nr_pages; + i += page_increm; + start += page_increm * PAGE_SIZE; + nr_pages -= page_increm; + } while (nr_pages); + return i; +} + +/* + * mm_lock_present - lock present pages within a range of address space. + * + * This is used to implement mlock2(MLOCK_LOCKONFAULT). VMAs must be already + * marked with the desired vm_flags, and mmap_sem must not be held. + */ +int mm_lock_present(unsigned long start, unsigned long len) +{ + struct mm_struct *mm = current->mm; + unsigned long end, nstart, nend; + struct vm_area_struct *vma = NULL; + int locked = 0; + long ret = 0; + + VM_BUG_ON(start & ~PAGE_MASK); + VM_BUG_ON(len != PAGE_ALIGN(len)); + end = start + len; + + for (nstart = start; nstart < end; nstart = nend) { + /* + * We want to fault in pages for [nstart; end) address range. + * Find first corresponding VMA. + */ + if (!locked) { + locked = 1; + down_read(&mm->mmap_sem); + vma = find_vma(mm, nstart); + } else if (nstart >= vma->vm_end) + vma = vma->vm_next; + if (!vma || vma->vm_start >= end) + break; + /* + * Set [nstart; nend) to intersection of desired address + * range with the first VMA. Also, skip undesirable VMA types. + */ + nend = min(end, vma->vm_end); + if (vma->vm_flags & (VM_IO | VM_PFNMAP)) + continue; + if (nstart < vma->vm_start) + nstart = vma->vm_start; + + ret = pin_user_pages(vma, nstart, nend, &locked); + if (ret < 0) + break; + nend = nstart + ret * PAGE_SIZE; + ret = 0; + } + if (locked) + up_read(&mm->mmap_sem); + return ret; /* 0 or negative error code */ +} + /* * __mm_populate - populate and/or mlock pages within a range of address space. * _ Patches currently in -mm which might be from emunson@xxxxxxxxxx are mm-mlock-refactor-mlock-munlock-and-munlockall-code.patch mm-mlock-add-new-mlock-munlock-and-munlockall-system-calls.patch mm-mlock-add-new-mlock-munlock-and-munlockall-system-calls-v4.patch mm-mlock-add-new-mlock-munlock-and-munlockall-system-calls-fix.patch mm-mlock-add-new-mlock-munlock-and-munlockall-system-calls-fix-2.patch mm-gup-add-mm_lock_present.patch mm-mlock-introduce-vm_lockonfault-and-add-mlock-flags-to-enable-it.patch mm-mlock-introduce-vm_lockonfault-and-add-mlock-flags-to-enable-it-v4.patch mm-mmap-add-mmap-flag-to-request-vm_lockonfault.patch mm-mmap-add-mmap-flag-to-request-vm_lockonfault-v4.patch selftests-vm-add-tests-for-lock-on-fault.patch selftests-vm-add-tests-for-lock-on-fault-v4.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html