For the architectures that do not implement their own tlb_flush() but do already use the generic mmu_gather, there are two options: 1) the platform has an efficient flush_tlb_range() and asm-generic/tlb.h doesn't need any overrides at all. 2) the platform lacks an efficient flush_tlb_range() and we select MMU_GATHER_NO_RANGE to minimize full invalidates. Convert all 'simple' architectures to one of these two forms. alpha: has no range invalidate -> 2 arc: already used flush_tlb_range() -> 1 c6x: has no range invalidate -> 2 hexagon: has an efficient flush_tlb_range() -> 1 (flush_tlb_mm() is in fact a full range invalidate, so no need to shoot down everything) m68k: has inefficient flush_tlb_range() -> 2 microblaze: has no flush_tlb_range() -> 2 mips: has efficient flush_tlb_range() -> 1 (even though it currently seems to use flush_tlb_mm()) nds32: already uses flush_tlb_range() -> 1 nios2: has inefficient flush_tlb_range() -> 2 (no limit on range iteration) openrisc: has inefficient flush_tlb_range() -> 2 (no limit on range iteration) parisc: already uses flush_tlb_range() -> 1 sparc32: already uses flush_tlb_range() -> 1 unicore32: has inefficient flush_tlb_range() -> 2 (no limit on range iteration) xtensa: has efficient flush_tlb_range() -> 1 Note this also fixes a bug in the existing code for a number platforms. Those platforms that did: tlb_end_vma() -> if (!full_mm) flush_tlb_*() tlb_flush -> if (full_mm) flush_tlb_mm() missed the case of shift_arg_pages(), which doesn't have @fullmm set, nor calls into tlb_*vma(), but still frees page-tables and thus needs an invalidate. The new code handles this by detecting a non-empty range, and either issuing the matching range invalidate or a full invalidate, depending on the capabilities. Cc: Nick Piggin <npiggin@xxxxxxxxx> Cc: "David S. Miller" <davem@xxxxxxxxxxxxx> Cc: Michal Simek <monstr@xxxxxxxxx> Cc: Helge Deller <deller@xxxxxx> Cc: Greentime Hu <green.hu@xxxxxxxxx> Cc: Richard Henderson <rth@xxxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: "Aneesh Kumar K.V" <aneesh.kumar@xxxxxxxxxxxxxxxxxx> Cc: Will Deacon <will.deacon@xxxxxxx> Cc: Ley Foon Tan <lftan@xxxxxxxxxx> Cc: Jonas Bonn <jonas@xxxxxxxxxxxx> Cc: Mark Salter <msalter@xxxxxxxxxx> Cc: Richard Kuo <rkuo@xxxxxxxxxxxxxx> Cc: Vineet Gupta <vgupta@xxxxxxxxxxxx> Cc: Paul Burton <paul.burton@xxxxxxxx> Cc: Max Filippov <jcmvbkbc@xxxxxxxxx> Cc: Guan Xuetao <gxt@xxxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> --- arch/alpha/Kconfig | 1 + arch/alpha/include/asm/tlb.h | 6 ------ arch/arc/include/asm/tlb.h | 23 ----------------------- arch/c6x/Kconfig | 1 + arch/c6x/include/asm/tlb.h | 2 -- arch/h8300/include/asm/tlb.h | 2 -- arch/hexagon/include/asm/tlb.h | 12 ------------ arch/m68k/Kconfig | 1 + arch/m68k/include/asm/tlb.h | 14 -------------- arch/microblaze/Kconfig | 1 + arch/microblaze/include/asm/tlb.h | 9 --------- arch/mips/include/asm/tlb.h | 8 -------- arch/nds32/include/asm/tlb.h | 10 ---------- arch/nios2/Kconfig | 1 + arch/nios2/include/asm/tlb.h | 8 ++++---- arch/openrisc/Kconfig | 1 + arch/openrisc/include/asm/tlb.h | 8 ++------ arch/parisc/include/asm/tlb.h | 13 ------------- arch/sparc/include/asm/tlb_32.h | 13 ------------- arch/unicore32/Kconfig | 1 + arch/unicore32/include/asm/tlb.h | 7 +++---- arch/xtensa/include/asm/tlb.h | 17 ----------------- 22 files changed, 16 insertions(+), 143 deletions(-) --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -36,6 +36,7 @@ config ALPHA select ODD_RT_SIGACTION select OLD_SIGSUSPEND select CPU_NO_EFFICIENT_FFS if !ALPHA_EV67 + select MMU_GATHER_NO_RANGE help The Alpha is a 64-bit general-purpose processor designed and marketed by the Digital Equipment Corporation of blessed memory, --- a/arch/alpha/include/asm/tlb.h +++ b/arch/alpha/include/asm/tlb.h @@ -2,12 +2,6 @@ #ifndef _ALPHA_TLB_H #define _ALPHA_TLB_H -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0) - -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) --- a/arch/arc/include/asm/tlb.h +++ b/arch/arc/include/asm/tlb.h @@ -9,29 +9,6 @@ #ifndef _ASM_ARC_TLB_H #define _ASM_ARC_TLB_H -#define tlb_flush(tlb) \ -do { \ - if (tlb->fullmm) \ - flush_tlb_mm((tlb)->mm); \ -} while (0) - -/* - * This pair is called at time of munmap/exit to flush cache and TLB entries - * for mappings being torn down. - * 1) cache-flush part -implemented via tlb_start_vma( ) for VIPT aliasing D$ - * 2) tlb-flush part - implemted via tlb_end_vma( ) flushes the TLB range - * - * Note, read http://lkml.org/lkml/2004/1/15/6 - */ - -#define tlb_end_vma(tlb, vma) \ -do { \ - if (!tlb->fullmm) \ - flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ -} while (0) - -#define __tlb_remove_tlb_entry(tlb, ptep, address) - #include <linux/pagemap.h> #include <asm-generic/tlb.h> --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -19,6 +19,7 @@ config C6X select GENERIC_CLOCKEVENTS select MODULES_USE_ELF_RELA select ARCH_NO_COHERENT_DMA_MMAP + select MMU_GATHER_NO_RANGE if MMU config MMU def_bool n --- a/arch/c6x/include/asm/tlb.h +++ b/arch/c6x/include/asm/tlb.h @@ -2,8 +2,6 @@ #ifndef _ASM_C6X_TLB_H #define _ASM_C6X_TLB_H -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #endif /* _ASM_C6X_TLB_H */ --- a/arch/h8300/include/asm/tlb.h +++ b/arch/h8300/include/asm/tlb.h @@ -2,8 +2,6 @@ #ifndef __H8300_TLB_H__ #define __H8300_TLB_H__ -#define tlb_flush(tlb) do { } while (0) - #include <asm-generic/tlb.h> #endif --- a/arch/hexagon/include/asm/tlb.h +++ b/arch/hexagon/include/asm/tlb.h @@ -22,18 +22,6 @@ #include <linux/pagemap.h> #include <asm/tlbflush.h> -/* - * We don't need any special per-pte or per-vma handling... - */ -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) - -/* - * .. because we flush the whole mm when it fills up - */ -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #endif --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -27,6 +27,7 @@ config M68K select OLD_SIGSUSPEND3 select OLD_SIGACTION select ARCH_DISCARD_MEMBLOCK + select MMU_GATHER_NO_RANGE if MMU config CPU_BIG_ENDIAN def_bool y --- a/arch/m68k/include/asm/tlb.h +++ b/arch/m68k/include/asm/tlb.h @@ -2,20 +2,6 @@ #ifndef _M68K_TLB_H #define _M68K_TLB_H -/* - * m68k doesn't need any special per-pte or - * per-vma handling.. - */ -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) - -/* - * .. because we flush the whole mm when it - * fills up. - */ -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #endif /* _M68K_TLB_H */ --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -40,6 +40,7 @@ config MICROBLAZE select TRACING_SUPPORT select VIRT_TO_BUS select CPU_NO_EFFICIENT_FFS + select MMU_GATHER_NO_RANGE if MMU # Endianness selection choice --- a/arch/microblaze/include/asm/tlb.h +++ b/arch/microblaze/include/asm/tlb.h @@ -11,16 +11,7 @@ #ifndef _ASM_MICROBLAZE_TLB_H #define _ASM_MICROBLAZE_TLB_H -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <linux/pagemap.h> - -#ifdef CONFIG_MMU -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, pte, address) do { } while (0) -#endif - #include <asm-generic/tlb.h> #endif /* _ASM_MICROBLAZE_TLB_H */ --- a/arch/mips/include/asm/tlb.h +++ b/arch/mips/include/asm/tlb.h @@ -5,14 +5,6 @@ #include <asm/cpu-features.h> #include <asm/mipsregs.h> -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) - -/* - * .. because we flush the whole mm when it fills up. - */ -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #define _UNIQUE_ENTRYHI(base, idx) \ (((base) + ((idx) << (PAGE_SHIFT + 1))) | \ (cpu_has_tlbinv ? MIPS_ENTRYHI_EHINV : 0)) --- a/arch/nds32/include/asm/tlb.h +++ b/arch/nds32/include/asm/tlb.h @@ -4,16 +4,6 @@ #ifndef __ASMNDS32_TLB_H #define __ASMNDS32_TLB_H -#define tlb_end_vma(tlb,vma) \ - do { \ - if(!tlb->fullmm) \ - flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ - } while (0) - -#define __tlb_remove_tlb_entry(tlb, pte, addr) do { } while (0) - -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #define __pte_free_tlb(tlb, pte, addr) pte_free((tlb)->mm, pte) --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -23,6 +23,7 @@ config NIOS2 select USB_ARCH_HAS_HCD if USB_SUPPORT select CPU_NO_EFFICIENT_FFS select ARCH_DISCARD_MEMBLOCK + select MMU_GATHER_NO_RANGE if MMU config GENERIC_CSUM def_bool y --- a/arch/nios2/include/asm/tlb.h +++ b/arch/nios2/include/asm/tlb.h @@ -11,12 +11,12 @@ #ifndef _ASM_NIOS2_TLB_H #define _ASM_NIOS2_TLB_H -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - extern void set_mmu_pid(unsigned long pid); -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) +/* + * NIOS32 does have flush_tlb_range(), but it lacks a limit and fallback to + * full mm invalidation. So use flush_tlb_mm() for everything. + */ #include <linux/pagemap.h> #include <asm-generic/tlb.h> --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -35,6 +35,7 @@ config OPENRISC select OMPIC if SMP select ARCH_WANT_FRAME_POINTERS select GENERIC_IRQ_MULTI_HANDLER + select MMU_GATHER_NO_RANGE if MMU config CPU_BIG_ENDIAN def_bool y --- a/arch/openrisc/include/asm/tlb.h +++ b/arch/openrisc/include/asm/tlb.h @@ -20,14 +20,10 @@ #define __ASM_OPENRISC_TLB_H__ /* - * or32 doesn't need any special per-pte or - * per-vma handling.. + * OpenRISC doesn't have an efficient flush_tlb_range() so use flush_tlb_mm() + * for everything. */ -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) #include <linux/pagemap.h> #include <asm-generic/tlb.h> --- a/arch/parisc/include/asm/tlb.h +++ b/arch/parisc/include/asm/tlb.h @@ -2,19 +2,6 @@ #ifndef _PARISC_TLB_H #define _PARISC_TLB_H -#define tlb_flush(tlb) \ -do { if ((tlb)->fullmm) \ - flush_tlb_mm((tlb)->mm);\ -} while (0) - -#define tlb_end_vma(tlb, vma) \ -do { if (!(tlb)->fullmm) \ - flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ -} while (0) - -#define __tlb_remove_tlb_entry(tlb, pte, address) \ - do { } while (0) - #include <asm-generic/tlb.h> #define __pmd_free_tlb(tlb, pmd, addr) pmd_free((tlb)->mm, pmd) --- a/arch/sparc/include/asm/tlb_32.h +++ b/arch/sparc/include/asm/tlb_32.h @@ -2,19 +2,6 @@ #ifndef _SPARC_TLB_H #define _SPARC_TLB_H -#define tlb_end_vma(tlb, vma) \ -do { \ - flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ -} while (0) - -#define __tlb_remove_tlb_entry(tlb, pte, address) \ - do { } while (0) - -#define tlb_flush(tlb) \ -do { \ - flush_tlb_mm((tlb)->mm); \ -} while (0) - #include <asm-generic/tlb.h> #endif /* _SPARC_TLB_H */ --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -20,6 +20,7 @@ config UNICORE32 select GENERIC_IOMAP select MODULES_USE_ELF_REL select NEED_DMA_MAP_STATE + select MMU_GATHER_NO_RANGE if MMU help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip --- a/arch/unicore32/include/asm/tlb.h +++ b/arch/unicore32/include/asm/tlb.h @@ -12,10 +12,9 @@ #ifndef __UNICORE_TLB_H__ #define __UNICORE_TLB_H__ -#define tlb_start_vma(tlb, vma) do { } while (0) -#define tlb_end_vma(tlb, vma) do { } while (0) -#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0) -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) +/* + * unicore32 lacks an efficient flush_tlb_range(), use flush_tlb_mm(). + */ #define __pte_free_tlb(tlb, pte, addr) \ do { \ --- a/arch/xtensa/include/asm/tlb.h +++ b/arch/xtensa/include/asm/tlb.h @@ -14,23 +14,6 @@ #include <asm/cache.h> #include <asm/page.h> -#if (DCACHE_WAY_SIZE <= PAGE_SIZE) - -# define tlb_end_vma(tlb,vma) do { } while (0) - -#else - -# define tlb_end_vma(tlb, vma) \ - do { \ - if (!tlb->fullmm) \ - flush_tlb_range(vma, vma->vm_start, vma->vm_end); \ - } while(0) - -#endif - -#define __tlb_remove_tlb_entry(tlb,pte,addr) do { } while (0) -#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm) - #include <asm-generic/tlb.h> #define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte)