The patch titled Subject: mm: wrap calls to set_pte_at_notify with invalidate_range_start and invalidate_range_end has been added to the -mm tree. Its filename is mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Haggai Eran <haggaie@xxxxxxxxxxxx> Subject: mm: wrap calls to set_pte_at_notify with invalidate_range_start and invalidate_range_end In order to allow sleeping during invalidate_page mmu notifier calls, we need to avoid calling when holding the PT lock. In addition to its direct calls, invalidate_page can also be called as a substitute for a change_pte call, in case the notifier client hasn't implemented change_pte. This patch drops the invalidate_page call from change_pte, and instead wraps all calls to change_pte with invalidate_range_start and invalidate_range_end calls. Note that change_pte still cannot sleep after this patch, and that clients implementing change_pte should not take action on it in case the number of outstanding invalidate_range_start calls is larger than one, otherwise they might miss a later invalidation. Signed-off-by: Haggai Eran <haggaie@xxxxxxxxxxxx> Cc: Andrea Arcangeli <andrea@xxxxxxxxxxxx> Cc: Sagi Grimberg <sagig@xxxxxxxxxxxx> Cc: Peter Zijlstra <a.p.zijlstra@xxxxxxxxx> Cc: Xiao Guangrong <xiaoguangrong@xxxxxxxxxxxxxxxxxx> Cc: Or Gerlitz <ogerlitz@xxxxxxxxxxxx> Cc: Haggai Eran <haggaie@xxxxxxxxxxxx> Cc: Shachar Raindel <raindel@xxxxxxxxxxxx> Cc: Liran Liss <liranl@xxxxxxxxxxxx> Cc: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx> Cc: Avi Kivity <avi@xxxxxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- kernel/events/uprobes.c | 5 +++++ mm/ksm.c | 21 +++++++++++++++++++-- mm/memory.c | 17 +++++++++++------ mm/mmu_notifier.c | 6 ------ 4 files changed, 35 insertions(+), 14 deletions(-) diff -puN kernel/events/uprobes.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end kernel/events/uprobes.c --- a/kernel/events/uprobes.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end +++ a/kernel/events/uprobes.c @@ -141,10 +141,14 @@ static int __replace_page(struct vm_area spinlock_t *ptl; pte_t *ptep; int err; + /* For mmu_notifiers */ + const unsigned long mmun_start = addr; + const unsigned long mmun_end = addr + PAGE_SIZE; /* For try_to_free_swap() and munlock_vma_page() below */ lock_page(page); + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); err = -EAGAIN; ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) @@ -173,6 +177,7 @@ static int __replace_page(struct vm_area err = 0; unlock: + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); unlock_page(page); return err; } diff -puN mm/ksm.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end mm/ksm.c --- a/mm/ksm.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end +++ a/mm/ksm.c @@ -709,15 +709,22 @@ static int write_protect_page(struct vm_ spinlock_t *ptl; int swapped; int err = -EFAULT; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ addr = page_address_in_vma(page, vma); if (addr == -EFAULT) goto out; BUG_ON(PageTransCompound(page)); + + mmun_start = addr; + mmun_end = addr + PAGE_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + ptep = page_check_address(page, mm, addr, &ptl, 0); if (!ptep) - goto out; + goto out_mn; if (pte_write(*ptep) || pte_dirty(*ptep)) { pte_t entry; @@ -752,6 +759,8 @@ static int write_protect_page(struct vm_ out_unlock: pte_unmap_unlock(ptep, ptl); +out_mn: + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out: return err; } @@ -776,6 +785,8 @@ static int replace_page(struct vm_area_s spinlock_t *ptl; unsigned long addr; int err = -EFAULT; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ addr = page_address_in_vma(page, vma); if (addr == -EFAULT) @@ -794,10 +805,14 @@ static int replace_page(struct vm_area_s if (!pmd_present(*pmd)) goto out; + mmun_start = addr; + mmun_end = addr + PAGE_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); if (!pte_same(*ptep, orig_pte)) { pte_unmap_unlock(ptep, ptl); - goto out; + goto out_mn; } get_page(kpage); @@ -814,6 +829,8 @@ static int replace_page(struct vm_area_s pte_unmap_unlock(ptep, ptl); err = 0; +out_mn: + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); out: return err; } diff -puN mm/memory.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end mm/memory.c --- a/mm/memory.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end +++ a/mm/memory.c @@ -2527,6 +2527,8 @@ static int do_wp_page(struct mm_struct * int ret = 0; int page_mkwrite = 0; struct page *dirty_page = NULL; + unsigned long mmun_start; /* For mmu_notifiers */ + unsigned long mmun_end; /* For mmu_notifiers */ old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) { @@ -2704,6 +2706,10 @@ gotten: if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) goto oom_free_new; + mmun_start = address & PAGE_MASK; + mmun_end = (address & PAGE_MASK) + PAGE_SIZE; + mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); + /* * Re-check the pte - we dropped the lock */ @@ -2766,14 +2772,13 @@ gotten: } else mem_cgroup_uncharge_page(new_page); + if (new_page) + page_cache_release(new_page); unlock: pte_unmap_unlock(page_table, ptl); - if (new_page) { - if (new_page == old_page) - /* cow happened, notify before releasing old_page */ - mmu_notifier_invalidate_page(mm, address); - page_cache_release(new_page); - } + if (new_page) + /* Only call the end notifier if the begin was called. */ + mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (old_page) { /* * Don't let another task, with possibly unlocked vma, diff -puN mm/mmu_notifier.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end mm/mmu_notifier.c --- a/mm/mmu_notifier.c~mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end +++ a/mm/mmu_notifier.c @@ -137,12 +137,6 @@ void __mmu_notifier_change_pte(struct mm hlist_for_each_entry_rcu(mn, n, &mm->mmu_notifier_mm->list, hlist) { if (mn->ops->change_pte) mn->ops->change_pte(mn, mm, address, pte); - /* - * Some drivers don't have change_pte, - * so we must call invalidate_page in that case. - */ - else if (mn->ops->invalidate_page) - mn->ops->invalidate_page(mn, mm, address); } srcu_read_unlock(&srcu, id); } _ Patches currently in -mm which might be from haggaie@xxxxxxxxxxxx are linux-next.patch mm-mmu_notifier-have-mmu_notifiers-use-a-global-srcu-so-they-may-safely-schedule.patch mm-mmu_notifier-init-notifier-if-necessary.patch mm-mmu_notifier-make-the-mmu_notifier-srcu-static.patch mm-move-all-mmu-notifier-invocations-to-be-done-outside-the-pt-lock.patch mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end.patch mm-wrap-calls-to-set_pte_at_notify-with-invalidate_range_start-and-invalidate_range_end-fix.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html