From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> When speculating faults (without holding mmap_sem) we need to validate that the vma against which we loaded pages is still valid when we're ready to install the new PTE. Therefore, replace the pte_offset_map_lock() calls that (re)take the PTL with pte_map_lock() which can fail in case we find the VMA changed since we started the fault. Instead of passing around the endless list of function arguments, replace the lot with a single structure so we can change context without endless function signature changes. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> [port to 4.10 kernel] Signed-off-by: Laurent Dufour <ldufour@xxxxxxxxxxxxxxxxxx> --- include/linux/mm.h | 1 + mm/memory.c | 57 +++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index b84615b0f64c..555ac9ac7202 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -280,6 +280,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ #define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ +#define FAULT_FLAG_SPECULATIVE 0x200 /* Speculative fault, not holding mmap_sem */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's diff --git a/mm/memory.c b/mm/memory.c index 374b99de75a5..bce32c9d73c2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2100,6 +2100,12 @@ static inline void wp_page_reuse(struct vm_fault *vmf) pte_unmap_unlock(vmf->pte, vmf->ptl); } +static bool pte_map_lock(struct vm_fault *vmf) +{ + vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); + return true; +} + /* * Handle the case of a page which we actually need to copy to a new page. * @@ -2127,6 +2133,7 @@ static int wp_page_copy(struct vm_fault *vmf) const unsigned long mmun_start = vmf->address & PAGE_MASK; const unsigned long mmun_end = mmun_start + PAGE_SIZE; struct mem_cgroup *memcg; + int ret = VM_FAULT_OOM; if (unlikely(anon_vma_prepare(vma))) goto oom; @@ -2154,7 +2161,11 @@ static int wp_page_copy(struct vm_fault *vmf) /* * Re-check the pte - we dropped the lock */ - vmf->pte = pte_offset_map_lock(mm, vmf->pmd, vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) { + mem_cgroup_cancel_charge(new_page, memcg, false); + ret = VM_FAULT_RETRY; + goto oom_free_new; + } if (likely(pte_same(*vmf->pte, vmf->orig_pte))) { if (old_page) { if (!PageAnon(old_page)) { @@ -2242,7 +2253,7 @@ static int wp_page_copy(struct vm_fault *vmf) oom: if (old_page) put_page(old_page); - return VM_FAULT_OOM; + return ret; } /** @@ -2263,8 +2274,8 @@ static int wp_page_copy(struct vm_fault *vmf) int finish_mkwrite_fault(struct vm_fault *vmf) { WARN_ON_ONCE(!(vmf->vma->vm_flags & VM_SHARED)); - vmf->pte = pte_offset_map_lock(vmf->vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) + return VM_FAULT_RETRY; /* * We might have raced with another page fault while we released the * pte_offset_map_lock. @@ -2382,8 +2393,11 @@ static int do_wp_page(struct vm_fault *vmf) get_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); lock_page(vmf->page); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) { + unlock_page(vmf->page); + put_page(vmf->page); + return VM_FAULT_RETRY; + } if (!pte_same(*vmf->pte, vmf->orig_pte)) { unlock_page(vmf->page); pte_unmap_unlock(vmf->pte, vmf->ptl); @@ -2541,8 +2555,10 @@ int do_swap_page(struct vm_fault *vmf) * Back out if somebody else faulted in this pte * while we released the pte lock. */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) { + delayacct_clear_flag(DELAYACCT_PF_SWAPIN); + return VM_FAULT_RETRY; + } if (likely(pte_same(*vmf->pte, vmf->orig_pte))) ret = VM_FAULT_OOM; delayacct_clear_flag(DELAYACCT_PF_SWAPIN); @@ -2598,8 +2614,11 @@ int do_swap_page(struct vm_fault *vmf) /* * Back out if somebody else already faulted in this pte. */ - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) { + ret = VM_FAULT_RETRY; + mem_cgroup_cancel_charge(page, memcg, false); + goto out_page; + } if (unlikely(!pte_same(*vmf->pte, vmf->orig_pte))) goto out_nomap; @@ -2763,8 +2782,8 @@ static int do_anonymous_page(struct vm_fault *vmf) !mm_forbids_zeropage(vma->vm_mm)) { entry = pte_mkspecial(pfn_pte(my_zero_pfn(vmf->address), vma->vm_page_prot)); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, - vmf->address, &vmf->ptl); + if (!pte_map_lock(vmf)) + return VM_FAULT_RETRY; if (!pte_none(*vmf->pte)) goto unlock; /* Deliver the page fault to userland, check inside PT lock */ @@ -2796,8 +2815,12 @@ static int do_anonymous_page(struct vm_fault *vmf) if (vma->vm_flags & VM_WRITE) entry = pte_mkwrite(pte_mkdirty(entry)); - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) { + /* XXX: should be factorized */ + mem_cgroup_cancel_charge(page, memcg, false); + put_page(page); + return VM_FAULT_RETRY; + } if (!pte_none(*vmf->pte)) goto release; @@ -2897,8 +2920,9 @@ static int pte_alloc_one_map(struct vm_fault *vmf) if (pmd_trans_unstable(vmf->pmd) || pmd_devmap(*vmf->pmd)) return VM_FAULT_NOPAGE; - vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, - &vmf->ptl); + if (!pte_map_lock(vmf)) + return VM_FAULT_RETRY; + return 0; } @@ -3218,6 +3242,7 @@ static int do_read_fault(struct vm_fault *vmf) * something). */ if (vma->vm_ops->map_pages && fault_around_bytes >> PAGE_SHIFT > 1) { + /* XXX: is a call to pte_map_lock(fe) required here ? */ ret = do_fault_around(vmf); if (ret) return ret; -- 2.7.4 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>