The force_flush is used for two cases, a tlb batch full, and a shared dirty page unmapped. Only the latter is required to flush the TLB under the page table lock, because the problem is page_mkclean returning when there are still writable TLB entries the page can be modified with. We are encountering cases of soft lockups due to high TLB flush latency with very large guests. There is probably some contetion in hypervisor and interconnect tuning to be done, and it's actually a hash MMU guest which has a whole other set of issues, but I'm looking for general ways to reduce TLB fushing under locks. --- mm/memory.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index 773d588b371d..1161ed3f1d0b 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1281,6 +1281,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, { struct mm_struct *mm = tlb->mm; int force_flush = 0; + int locked_flush = 0; int rss[NR_MM_COUNTERS]; spinlock_t *ptl; pte_t *start_pte; @@ -1322,6 +1323,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, if (!PageAnon(page)) { if (pte_dirty(ptent)) { force_flush = 1; + locked_flush = 1; set_page_dirty(page); } if (pte_young(ptent) && @@ -1384,7 +1386,7 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, arch_leave_lazy_mmu_mode(); /* Do the actual TLB flush before dropping ptl */ - if (force_flush) + if (locked_flush) tlb_flush_mmu_tlbonly(tlb); pte_unmap_unlock(start_pte, ptl); @@ -1395,8 +1397,12 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb, * memory too. Restart if we didn't do everything. */ if (force_flush) { - force_flush = 0; + if (!locked_flush) + tlb_flush_mmu_tlbonly(tlb); tlb_flush_mmu_free(tlb); + + force_flush = 0; + locked_flush = 0; if (addr != end) goto again; } -- 2.17.0