This is a note to let you know that I've just added the patch titled x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From ce4a4e565f5264909a18c733b864c3f74467f69e Mon Sep 17 00:00:00 2001 From: Andy Lutomirski <luto@xxxxxxxxxx> Date: Sun, 28 May 2017 10:00:14 -0700 Subject: x86/mm: Remove the UP asm/tlbflush.h code, always use the (formerly) SMP code From: Andy Lutomirski <luto@xxxxxxxxxx> commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream. The UP asm/tlbflush.h generates somewhat nicer code than the SMP version. Aside from that, it's fallen quite a bit behind the SMP code: - flush_tlb_mm_range() didn't flush individual pages if the range was small. - The lazy TLB code was much weaker. This usually wouldn't matter, but, if a kernel thread flushed its lazy "active_mm" more than once (due to reclaim or similar), it wouldn't be unlazied and would instead pointlessly flush repeatedly. - Tracepoints were missing. Aside from that, simply having the UP code around was a maintanence burden, since it means that any change to the TLB flush code had to make sure not to break it. Simplify everything by deleting the UP code. Signed-off-by: Andy Lutomirski <luto@xxxxxxxxxx> Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> Cc: Arjan van de Ven <arjan@xxxxxxxxxxxxxxx> Cc: Borislav Petkov <bpetkov@xxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxx> Cc: Michal Hocko <mhocko@xxxxxxxx> Cc: Nadav Amit <nadav.amit@xxxxxxxxx> Cc: Nadav Amit <namit@xxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: linux-mm@xxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> Signed-off-by: Eduardo Valentin <eduval@xxxxxxxxxx> Signed-off-by: Eduardo Valentin <edubezval@xxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- arch/x86/Kconfig | 2 arch/x86/include/asm/hardirq.h | 2 arch/x86/include/asm/mmu.h | 6 -- arch/x86/include/asm/mmu_context.h | 2 arch/x86/include/asm/tlbbatch.h | 2 arch/x86/include/asm/tlbflush.h | 81 ------------------------------------- arch/x86/mm/init.c | 2 arch/x86/mm/tlb.c | 17 ------- 8 files changed, 5 insertions(+), 109 deletions(-) --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -45,7 +45,7 @@ config X86 select ARCH_USE_CMPXCHG_LOCKREF if X86_64 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS - select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH if SMP + select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_IPC_PARSE_VERSION if X86_32 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -22,8 +22,8 @@ typedef struct { #ifdef CONFIG_SMP unsigned int irq_resched_count; unsigned int irq_call_count; - unsigned int irq_tlb_count; #endif + unsigned int irq_tlb_count; #ifdef CONFIG_X86_THERMAL_VECTOR unsigned int irq_thermal_count; #endif --- a/arch/x86/include/asm/mmu.h +++ b/arch/x86/include/asm/mmu.h @@ -33,12 +33,6 @@ typedef struct { #endif } mm_context_t; -#ifdef CONFIG_SMP void leave_mm(int cpu); -#else -static inline void leave_mm(int cpu) -{ -} -#endif #endif /* _ASM_X86_MMU_H */ --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -99,10 +99,8 @@ static inline void load_mm_ldt(struct mm static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { -#ifdef CONFIG_SMP if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK) this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY); -#endif } static inline int init_new_context(struct task_struct *tsk, --- a/arch/x86/include/asm/tlbbatch.h +++ b/arch/x86/include/asm/tlbbatch.h @@ -3,7 +3,6 @@ #include <linux/cpumask.h> -#ifdef CONFIG_SMP struct arch_tlbflush_unmap_batch { /* * Each bit set is a CPU that potentially has a TLB entry for one of @@ -11,6 +10,5 @@ struct arch_tlbflush_unmap_batch { */ struct cpumask cpumask; }; -#endif #endif /* _ARCH_X86_TLBBATCH_H */ --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -7,6 +7,7 @@ #include <asm/processor.h> #include <asm/cpufeature.h> #include <asm/special_insns.h> +#include <asm/smp.h> static inline void __invpcid(unsigned long pcid, unsigned long addr, unsigned long type) @@ -65,10 +66,8 @@ static inline void invpcid_flush_all_non #endif struct tlb_state { -#ifdef CONFIG_SMP struct mm_struct *active_mm; int state; -#endif /* * Access to this CR4 shadow and to H/W CR4 is protected by @@ -222,82 +221,6 @@ struct flush_tlb_info { unsigned long end; }; -#ifndef CONFIG_SMP - -/* "_up" is for UniProcessor. - * - * This is a helper for other header functions. *Not* intended to be called - * directly. All global TLB flushes need to either call this, or to bump the - * vm statistics themselves. - */ -static inline void __flush_tlb_up(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb(); -} - -static inline void flush_tlb_all(void) -{ - count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL); - __flush_tlb_all(); -} - -static inline void flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void local_flush_tlb(void) -{ - __flush_tlb_up(); -} - -static inline void flush_tlb_mm(struct mm_struct *mm) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_page(struct vm_area_struct *vma, - unsigned long addr) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_one(addr); -} - -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - if (vma->vm_mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void flush_tlb_mm_range(struct mm_struct *mm, - unsigned long start, unsigned long end, unsigned long vmflag) -{ - if (mm == current->active_mm) - __flush_tlb_up(); -} - -static inline void native_flush_tlb_others(const struct cpumask *cpumask, - const struct flush_tlb_info *info) -{ -} - -static inline void reset_lazy_tlbstate(void) -{ -} - -static inline void flush_tlb_kernel_range(unsigned long start, - unsigned long end) -{ - flush_tlb_all(); -} - -#else /* SMP */ - -#include <asm/smp.h> - #define local_flush_tlb() __flush_tlb() #define flush_tlb_mm(mm) flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL) @@ -337,8 +260,6 @@ static inline void arch_tlbbatch_add_mm( extern void arch_tlbbatch_flush(struct arch_tlbflush_unmap_batch *batch); -#endif /* SMP */ - #ifndef CONFIG_PARAVIRT #define flush_tlb_others(mask, info) \ native_flush_tlb_others(mask, info) --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -764,10 +764,8 @@ void __init zone_sizes_init(void) } DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) = { -#ifdef CONFIG_SMP .active_mm = &init_mm, .state = 0, -#endif .cr4 = ~0UL, /* fail hard if we screw up cr4 shadow initialization */ }; EXPORT_SYMBOL_GPL(cpu_tlbstate); --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -15,7 +15,7 @@ #include <linux/debugfs.h> /* - * Smarter SMP flushing macros. + * TLB flushing, formerly SMP-only * c/o Linus Torvalds. * * These mean you can really definitely utterly forget about @@ -28,8 +28,6 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ -#ifdef CONFIG_SMP - /* * We cannot call mmdrop() because we are in interrupt context, * instead update mm->cpu_vm_mask. @@ -53,8 +51,6 @@ void leave_mm(int cpu) } EXPORT_SYMBOL_GPL(leave_mm); -#endif /* CONFIG_SMP */ - void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { @@ -85,10 +81,8 @@ void switch_mm_irqs_off(struct mm_struct set_pgd(pgd, init_mm.pgd[stack_pgd_index]); } -#ifdef CONFIG_SMP this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); this_cpu_write(cpu_tlbstate.active_mm, next); -#endif cpumask_set_cpu(cpu, mm_cpumask(next)); @@ -146,9 +140,7 @@ void switch_mm_irqs_off(struct mm_struct if (unlikely(prev->context.ldt != next->context.ldt)) load_mm_ldt(next); #endif - } -#ifdef CONFIG_SMP - else { + } else { this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK); BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next); @@ -175,11 +167,8 @@ void switch_mm_irqs_off(struct mm_struct load_mm_ldt(next); } } -#endif } -#ifdef CONFIG_SMP - /* * The flush IPI assumes that a thread switch happens in this order: * [cpu0: the cpu that switches] @@ -459,5 +448,3 @@ static int __init create_tlb_single_page return 0; } late_initcall(create_tlb_single_page_flush_ceiling); - -#endif /* CONFIG_SMP */ Patches currently in stable-queue which might be from luto@xxxxxxxxxx are queue-4.9/x86-mm-refactor-flush_tlb_mm_range-to-merge-local-and-remote-cases.patch queue-4.9/x86-mm-pass-flush_tlb_info-to-flush_tlb_others-etc.patch queue-4.9/x86-mm-rework-lazy-tlb-to-track-the-actual-loaded-mm.patch queue-4.9/x86-mm-kvm-teach-kvm-s-vmx-code-that-cr3-isn-t-a-constant.patch queue-4.9/x86-mm-use-new-merged-flush-logic-in-arch_tlbbatch_flush.patch queue-4.9/x86-kvm-vmx-simplify-segment_base.patch queue-4.9/x86-entry-unwind-create-stack-frames-for-saved-interrupt-registers.patch queue-4.9/x86-mm-reduce-indentation-in-flush_tlb_func.patch queue-4.9/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch queue-4.9/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch queue-4.9/mm-x86-mm-make-the-batched-unmap-tlb-flush-api-more-generic.patch queue-4.9/x86-kvm-vmx-defer-tr-reload-after-vm-exit.patch queue-4.9/x86-mm-change-the-leave_mm-condition-for-local-tlb-flushes.patch queue-4.9/x86-mm-be-more-consistent-wrt-page_shift-vs-page_size-in-tlb-flush-code.patch