[ Added linux-arch, for totally untested patch to prepare for some TLB shootdown fixes ] On Mon, Apr 21, 2014 at 1:52 PM, H. Peter Anvin <hpa@xxxxxxxxx> wrote: > > So yes, we need to (re-)mark the page dirty some way or another after > the TLB shootdown has completed, or somehow hold the I/O until after > that point. So how about this preparatory patch. NOTE! This doesn't actually fix anything, but it moves the dirty bit into the mmu_gather domain, so that eventually things _can_ be fixed. Right now every implementation of __tlb_remove_page() just ends up doing that synchronous if (dirty) set_page_dirty(page); without batching it up. But at least it should make it possible to start fixing things. Actually moving the dirty bit information into the batching is somewhat painful, because free_pages_and_swap_cache() really wants an array of 'struct page *', rather than some array of "page and dirty info". And that in turn is mainly because it then passes it down to release_pages(), rather than anything else. But it looks doable. Comments? This compiles on x86, does it compile on other architectures (in particular, the ones that don't use asm-generic/tlb.h and thus HAVE_GENERIC_MMU_GATHER). Linus
arch/arm/include/asm/tlb.h | 6 ++++-- arch/ia64/include/asm/tlb.h | 6 ++++-- arch/s390/include/asm/tlb.h | 4 +++- arch/sh/include/asm/tlb.h | 6 ++++-- arch/um/include/asm/tlb.h | 6 ++++-- include/asm-generic/tlb.h | 4 ++-- mm/hugetlb.c | 4 +--- mm/memory.c | 15 +++++++++------ 8 files changed, 31 insertions(+), 20 deletions(-) diff --git a/arch/arm/include/asm/tlb.h b/arch/arm/include/asm/tlb.h index 0baf7f0d9394..ac9c16af8e63 100644 --- a/arch/arm/include/asm/tlb.h +++ b/arch/arm/include/asm/tlb.h @@ -165,8 +165,10 @@ tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) tlb_flush(tlb); } -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); tlb->pages[tlb->nr++] = page; VM_BUG_ON(tlb->nr > tlb->max); return tlb->max - tlb->nr; @@ -174,7 +176,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (!__tlb_remove_page(tlb, page, 0)) tlb_flush_mmu(tlb); } diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h index bc5efc7c3f3f..9049a7d6427d 100644 --- a/arch/ia64/include/asm/tlb.h +++ b/arch/ia64/include/asm/tlb.h @@ -193,8 +193,10 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) * must be delayed until after the TLB has been flushed (see comments at the beginning of * this file). */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); tlb->need_flush = 1; if (!tlb->nr && tlb->pages == tlb->local) @@ -213,7 +215,7 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (!__tlb_remove_page(tlb, page, 0)) tlb_flush_mmu(tlb); } diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index c544b6f05d95..8107a8c53985 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -76,8 +76,10 @@ static inline void tlb_finish_mmu(struct mmu_gather *tlb, * tlb_ptep_clear_flush. In both flush modes the tlb for a page cache page * has already been freed, so just do free_page_and_swap_cache. */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); free_page_and_swap_cache(page); return 1; /* avoid calling tlb_flush_mmu */ } diff --git a/arch/sh/include/asm/tlb.h b/arch/sh/include/asm/tlb.h index 362192ed12fe..428de7858d27 100644 --- a/arch/sh/include/asm/tlb.h +++ b/arch/sh/include/asm/tlb.h @@ -90,15 +90,17 @@ static inline void tlb_flush_mmu(struct mmu_gather *tlb) { } -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, int dirty) { + if (dirty) + set_page_dirty(page); free_page_and_swap_cache(page); return 1; /* avoid calling tlb_flush_mmu */ } static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - __tlb_remove_page(tlb, page); + __tlb_remove_page(tlb, page, 0); } #define pte_free_tlb(tlb, ptep, addr) pte_free((tlb)->mm, ptep) diff --git a/arch/um/include/asm/tlb.h b/arch/um/include/asm/tlb.h index 29b0301c18aa..df557f987819 100644 --- a/arch/um/include/asm/tlb.h +++ b/arch/um/include/asm/tlb.h @@ -86,8 +86,10 @@ tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end) * while handling the additional races in SMP caused by other CPUs * caching valid mappings in their TLBs. */ -static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { + if (dirty) + set_page_dirty(page); tlb->need_flush = 1; free_page_and_swap_cache(page); return 1; /* avoid calling tlb_flush_mmu */ @@ -95,7 +97,7 @@ static inline int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - __tlb_remove_page(tlb, page); + __tlb_remove_page(tlb, page, 0); } /** diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 5672d7ea1fa0..541c563f0ff9 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -116,7 +116,7 @@ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, unsigned long void tlb_flush_mmu(struct mmu_gather *tlb); void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long end); -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); +int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty); /* tlb_remove_page * Similar to __tlb_remove_page but will call tlb_flush_mmu() itself when @@ -124,7 +124,7 @@ int __tlb_remove_page(struct mmu_gather *tlb, struct page *page); */ static inline void tlb_remove_page(struct mmu_gather *tlb, struct page *page) { - if (!__tlb_remove_page(tlb, page)) + if (!__tlb_remove_page(tlb, page, 0)) tlb_flush_mmu(tlb); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 246192929a2d..dfbe23c0c200 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -2516,11 +2516,9 @@ again: pte = huge_ptep_get_and_clear(mm, address, ptep); tlb_remove_tlb_entry(tlb, ptep, address); - if (huge_pte_dirty(pte)) - set_page_dirty(page); page_remove_rmap(page); - force_flush = !__tlb_remove_page(tlb, page); + force_flush = !__tlb_remove_page(tlb, page, huge_pte_dirty(pte)); if (force_flush) { spin_unlock(ptl); break; diff --git a/mm/memory.c b/mm/memory.c index d0f0bef3be48..62fdcd1995f4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -277,12 +277,15 @@ void tlb_finish_mmu(struct mmu_gather *tlb, unsigned long start, unsigned long e * mappings in their TLBs. Returns the number of free page slots left. * When out of page slots we must call tlb_flush_mmu(). */ -int __tlb_remove_page(struct mmu_gather *tlb, struct page *page) +int __tlb_remove_page(struct mmu_gather *tlb, struct page *page, bool dirty) { struct mmu_gather_batch *batch; VM_BUG_ON(!tlb->need_flush); + /* FIXME! This needs to be batched too */ + if (dirty) + set_page_dirty(page); batch = tlb->active; batch->pages[batch->nr++] = page; if (batch->nr == batch->max) { @@ -1124,11 +1127,11 @@ again: pte_file_mksoft_dirty(ptfile); set_pte_at(mm, addr, pte, ptfile); } - if (PageAnon(page)) + if (PageAnon(page)) { + /* We don't bother saving dirty state for anonymous pages */ + ptent = pte_mkclean(ptent); rss[MM_ANONPAGES]--; - else { - if (pte_dirty(ptent)) - set_page_dirty(page); + } else { if (pte_young(ptent) && likely(!(vma->vm_flags & VM_SEQ_READ))) mark_page_accessed(page); @@ -1137,7 +1140,7 @@ again: page_remove_rmap(page); if (unlikely(page_mapcount(page) < 0)) print_bad_pte(vma, addr, ptent, page); - force_flush = !__tlb_remove_page(tlb, page); + force_flush = !__tlb_remove_page(tlb, page, pte_dirty(ptent)); if (force_flush) break; continue;