Update the way arch_[enter|leave]_lazy_mmu_mode() is called in pagemap_scan_pmd_entry() to follow the normal pattern of holding the ptl for user space mappings. As a result the scope is reduced to only the pte table, but that's where most of the performance win is. While I believe there wasn't technically a bug here, the original scope made it easier to accidentally nest or, worse, accidentally call something like kmap() which would expect an immediate mode pte modification but it would end up deferred. Acked-by: David Hildenbrand <david@xxxxxxxxxx> Signed-off-by: Ryan Roberts <ryan.roberts@xxxxxxx> --- fs/proc/task_mmu.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index c17615e21a5d..b0f189815512 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2459,22 +2459,19 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, spinlock_t *ptl; int ret; - arch_enter_lazy_mmu_mode(); - ret = pagemap_scan_thp_entry(pmd, start, end, walk); - if (ret != -ENOENT) { - arch_leave_lazy_mmu_mode(); + if (ret != -ENOENT) return ret; - } ret = 0; start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl); if (!pte) { - arch_leave_lazy_mmu_mode(); walk->action = ACTION_AGAIN; return 0; } + arch_enter_lazy_mmu_mode(); + if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { @@ -2543,8 +2540,8 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, if (flush_end) flush_tlb_range(vma, start, addr); - pte_unmap_unlock(start_pte, ptl); arch_leave_lazy_mmu_mode(); + pte_unmap_unlock(start_pte, ptl); cond_resched(); return ret; -- 2.43.0