The patch titled Subject: userfaultfd: call handle_userfault() for userfaultfd_missing() faults has been added to the -mm tree. Its filename is userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Andrea Arcangeli <aarcange@xxxxxxxxxx> Subject: userfaultfd: call handle_userfault() for userfaultfd_missing() faults This is where the page faults must be modified to call handle_userfault() if userfaultfd_missing() is true (so if the vma->vm_flags had VM_UFFD_MISSING set). handle_userfault() then takes care of blocking the page fault and delivering it to userland. The fault flags must also be passed as parameter so the "read|write" kind of fault can be passed to userland. Signed-off-by: Andrea Arcangeli <aarcange@xxxxxxxxxx> Acked-by: Pavel Emelyanov <xemul@xxxxxxxxxxxxx> Cc: Sanidhya Kashyap <sanidhya.gatech@xxxxxxxxx> Cc: zhang.zhanghailiang@xxxxxxxxxx Cc: "Kirill A. Shutemov" <kirill@xxxxxxxxxxxxx> Cc: Andres Lagar-Cavilla <andreslc@xxxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxx> Cc: Andy Lutomirski <luto@xxxxxxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Peter Feiner <pfeiner@xxxxxxxxxx> Cc: "Dr. David Alan Gilbert" <dgilbert@xxxxxxxxxx> Cc: Johannes Weiner <hannes@xxxxxxxxxxx> Cc: "Huangpeng (Peter)" <peter.huangpeng@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/huge_memory.c | 69 ++++++++++++++++++++++++++++++--------------- mm/memory.c | 16 ++++++++++ 2 files changed, 63 insertions(+), 22 deletions(-) diff -puN mm/huge_memory.c~userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults mm/huge_memory.c --- a/mm/huge_memory.c~userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults +++ a/mm/huge_memory.c @@ -23,6 +23,7 @@ #include <linux/pagemap.h> #include <linux/migrate.h> #include <linux/hashtable.h> +#include <linux/userfaultfd_k.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -717,7 +718,8 @@ static inline pmd_t mk_huge_pmd(struct p static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, - struct page *page, gfp_t gfp) + struct page *page, gfp_t gfp, + unsigned int flags) { struct mem_cgroup *memcg; pgtable_t pgtable; @@ -725,12 +727,16 @@ static int __do_huge_pmd_anonymous_page( VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) - return VM_FAULT_OOM; + if (mem_cgroup_try_charge(page, mm, gfp, &memcg)) { + put_page(page); + count_vm_event(THP_FAULT_FALLBACK); + return VM_FAULT_FALLBACK; + } pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) { mem_cgroup_cancel_charge(page, memcg); + put_page(page); return VM_FAULT_OOM; } @@ -750,6 +756,21 @@ static int __do_huge_pmd_anonymous_page( pte_free(mm, pgtable); } else { pmd_t entry; + + /* Deliver the page fault to userland */ + if (userfaultfd_missing(vma)) { + int ret; + + spin_unlock(ptl); + mem_cgroup_cancel_charge(page, memcg); + put_page(page); + pte_free(mm, pgtable); + ret = handle_userfault(vma, haddr, flags, + VM_UFFD_MISSING); + VM_BUG_ON(ret & VM_FAULT_FALLBACK); + return ret; + } + entry = mk_huge_pmd(page, vma->vm_page_prot); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); page_add_new_anon_rmap(page, vma, haddr); @@ -760,6 +781,7 @@ static int __do_huge_pmd_anonymous_page( add_mm_counter(mm, MM_ANONPAGES, HPAGE_PMD_NR); atomic_long_inc(&mm->nr_ptes); spin_unlock(ptl); + count_vm_event(THP_FAULT_ALLOC); } return 0; @@ -771,19 +793,16 @@ static inline gfp_t alloc_hugepage_gfpma } /* Caller must hold page table lock. */ -static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, +static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm, struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd, struct page *zero_page) { pmd_t entry; - if (!pmd_none(*pmd)) - return false; entry = mk_pmd(zero_page, vma->vm_page_prot); entry = pmd_mkhuge(entry); pgtable_trans_huge_deposit(mm, pmd, pgtable); set_pmd_at(mm, haddr, pmd, entry); atomic_long_inc(&mm->nr_ptes); - return true; } int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, @@ -806,6 +825,7 @@ int do_huge_pmd_anonymous_page(struct mm pgtable_t pgtable; struct page *zero_page; bool set; + int ret; pgtable = pte_alloc_one(mm, haddr); if (unlikely(!pgtable)) return VM_FAULT_OOM; @@ -816,14 +836,28 @@ int do_huge_pmd_anonymous_page(struct mm return VM_FAULT_FALLBACK; } ptl = pmd_lock(mm, pmd); - set = set_huge_zero_page(pgtable, mm, vma, haddr, pmd, - zero_page); - spin_unlock(ptl); + ret = 0; + set = false; + if (pmd_none(*pmd)) { + if (userfaultfd_missing(vma)) { + spin_unlock(ptl); + ret = handle_userfault(vma, haddr, flags, + VM_UFFD_MISSING); + VM_BUG_ON(ret & VM_FAULT_FALLBACK); + } else { + set_huge_zero_page(pgtable, mm, vma, + haddr, pmd, + zero_page); + spin_unlock(ptl); + set = true; + } + } else + spin_unlock(ptl); if (!set) { pte_free(mm, pgtable); put_huge_zero_page(); } - return 0; + return ret; } gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma), 0); page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER); @@ -831,14 +865,7 @@ int do_huge_pmd_anonymous_page(struct mm count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; } - if (unlikely(__do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp))) { - put_page(page); - count_vm_event(THP_FAULT_FALLBACK); - return VM_FAULT_FALLBACK; - } - - count_vm_event(THP_FAULT_ALLOC); - return 0; + return __do_huge_pmd_anonymous_page(mm, vma, haddr, pmd, page, gfp, flags); } int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, @@ -873,16 +900,14 @@ int copy_huge_pmd(struct mm_struct *dst_ */ if (is_huge_zero_pmd(pmd)) { struct page *zero_page; - bool set; /* * get_huge_zero_page() will never allocate a new page here, * since we already have a zero page to copy. It just takes a * reference. */ zero_page = get_huge_zero_page(); - set = set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, + set_huge_zero_page(pgtable, dst_mm, vma, addr, dst_pmd, zero_page); - BUG_ON(!set); /* unexpected !pmd_none(dst_pmd) */ ret = 0; goto out_unlock; } diff -puN mm/memory.c~userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults mm/memory.c --- a/mm/memory.c~userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults +++ a/mm/memory.c @@ -61,6 +61,7 @@ #include <linux/string.h> #include <linux/dma-debug.h> #include <linux/debugfs.h> +#include <linux/userfaultfd_k.h> #include <asm/io.h> #include <asm/pgalloc.h> @@ -2680,6 +2681,12 @@ static int do_anonymous_page(struct mm_s page_table = pte_offset_map_lock(mm, pmd, address, &ptl); if (!pte_none(*page_table)) goto unlock; + /* Deliver the page fault to userland, check inside PT lock */ + if (userfaultfd_missing(vma)) { + pte_unmap_unlock(page_table, ptl); + return handle_userfault(vma, address, flags, + VM_UFFD_MISSING); + } goto setpte; } @@ -2707,6 +2714,15 @@ static int do_anonymous_page(struct mm_s if (!pte_none(*page_table)) goto release; + /* Deliver the page fault to userland, check inside PT lock */ + if (userfaultfd_missing(vma)) { + pte_unmap_unlock(page_table, ptl); + mem_cgroup_cancel_charge(page, memcg); + page_cache_release(page); + return handle_userfault(vma, address, flags, + VM_UFFD_MISSING); + } + inc_mm_counter_fast(mm, MM_ANONPAGES); page_add_new_anon_rmap(page, vma, address); mem_cgroup_commit_charge(page, memcg, false); _ Patches currently in -mm which might be from aarcange@xxxxxxxxxx are thp-cleanup-how-khugepaged-enters-freezer.patch mm-drop-bogus-vm_bug_on_page-assert-in-put_page-codepath.patch mm-avoid-tail-page-refcounting-on-non-thp-compound-pages.patch mm-thp-split-out-pmd-collpase-flush-into-a-separate-functions.patch mm-thp-split-out-pmd-collpase-flush-into-a-separate-functions-fix.patch powerpc-mm-use-generic-version-of-pmdp_clear_flush.patch mm-clarify-that-the-function-operateds-on-hugepage-pte.patch userfaultfd-linux-documentation-vm-userfaultfdtxt.patch userfaultfd-waitqueue-add-nr-wake-parameter-to-__wake_up_locked_key.patch userfaultfd-uapi.patch userfaultfd-linux-userfaultfd_kh.patch userfaultfd-add-vm_userfaultfd_ctx-to-the-vm_area_struct.patch userfaultfd-add-vm_uffd_missing-and-vm_uffd_wp.patch userfaultfd-call-handle_userfault-for-userfaultfd_missing-faults.patch userfaultfd-teach-vma_merge-to-merge-across-vma-vm_userfaultfd_ctx.patch userfaultfd-prevent-khugepaged-to-merge-if-userfaultfd-is-armed.patch userfaultfd-add-new-syscall-to-provide-memory-externalization.patch userfaultfd-add-new-syscall-to-provide-memory-externalization-fix.patch userfaultfd-rename-uffd_apibits-into-features.patch userfaultfd-rename-uffd_apibits-into-features-fixup.patch userfaultfd-change-the-read-api-to-return-a-uffd_msg.patch userfaultfd-wake-pending-userfaults.patch userfaultfd-optimize-read-and-poll-to-be-o1.patch userfaultfd-allocate-the-userfaultfd_ctx-cacheline-aligned.patch userfaultfd-solve-the-race-between-uffdio_copyzeropage-and-read.patch userfaultfd-buildsystem-activation.patch userfaultfd-activate-syscall.patch userfaultfd-uffdio_copyuffdio_zeropage-uapi.patch userfaultfd-mcopy_atomicmfill_zeropage-uffdio_copyuffdio_zeropage-preparation.patch userfaultfd-avoid-mmap_sem-read-recursion-in-mcopy_atomic.patch userfaultfd-uffdio_copy-and-uffdio_zeropage.patch page-flags-trivial-cleanup-for-pagetrans-helpers.patch page-flags-introduce-page-flags-policies-wrt-compound-pages.patch page-flags-define-pg_locked-behavior-on-compound-pages.patch page-flags-define-behavior-of-fs-io-related-flags-on-compound-pages.patch page-flags-define-behavior-of-lru-related-flags-on-compound-pages.patch page-flags-define-behavior-slb-related-flags-on-compound-pages.patch page-flags-define-behavior-of-xen-related-flags-on-compound-pages.patch page-flags-define-pg_reserved-behavior-on-compound-pages.patch page-flags-define-pg_swapbacked-behavior-on-compound-pages.patch page-flags-define-pg_swapcache-behavior-on-compound-pages.patch page-flags-define-pg_mlocked-behavior-on-compound-pages.patch page-flags-define-pg_uncached-behavior-on-compound-pages.patch page-flags-define-pg_uptodate-behavior-on-compound-pages.patch page-flags-look-on-head-page-if-the-flag-is-encoded-in-page-mapping.patch mm-sanitize-page-mapping-for-tail-pages.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html