Nick has a valid point that the sched_in() hook is a fast-path compared to switch_mm(). Adding an extra TIF test in a fast-path to save a barrier in a comparatively slow-path is therefore not such a good idea overall. Therefore, move the architecture hook to switch_mm() instead. [ This patch is aimed at Paul's tree. It applies on top of "membarrier: Provide register expedited private command (v4)" and "membarrier: Document scheduler barrier requirements (v4)". ] Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx> CC: Peter Zijlstra <peterz@xxxxxxxxxxxxx> CC: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> CC: Nicholas Piggin <npiggin@xxxxxxxxx> CC: Boqun Feng <boqun.feng@xxxxxxxxx> CC: Andrew Hunter <ahh@xxxxxxxxxx> CC: Maged Michael <maged.michael@xxxxxxxxx> CC: gromer@xxxxxxxxxx CC: Avi Kivity <avi@xxxxxxxxxxxx> CC: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> CC: Paul Mackerras <paulus@xxxxxxxxx> CC: Michael Ellerman <mpe@xxxxxxxxxxxxxx> CC: Dave Watson <davejwatson@xxxxxx> CC: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx> CC: Will Deacon <will.deacon@xxxxxxx> CC: Andy Lutomirski <luto@xxxxxxxxxx> CC: Ingo Molnar <mingo@xxxxxxxxxx> CC: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> CC: linuxppc-dev@xxxxxxxxxxxxxxxx CC: linux-arch@xxxxxxxxxxxxxxx --- arch/powerpc/include/asm/membarrier.h | 9 ++++----- arch/powerpc/mm/mmu_context.c | 7 +++++++ include/linux/sched/mm.h | 13 ++++--------- kernel/sched/core.c | 6 ++---- 4 files changed, 17 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/include/asm/membarrier.h b/arch/powerpc/include/asm/membarrier.h index 588154c1cf57..61152a7a3cf9 100644 --- a/arch/powerpc/include/asm/membarrier.h +++ b/arch/powerpc/include/asm/membarrier.h @@ -1,8 +1,8 @@ #ifndef _ASM_POWERPC_MEMBARRIER_H #define _ASM_POWERPC_MEMBARRIER_H -static inline void membarrier_arch_sched_in(struct task_struct *prev, - struct task_struct *next) +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { /* * Only need the full barrier when switching between processes. @@ -11,9 +11,8 @@ static inline void membarrier_arch_sched_in(struct task_struct *prev, * when switching from userspace to kernel is not needed after * store to rq->curr. */ - if (likely(!test_ti_thread_flag(task_thread_info(next), - TIF_MEMBARRIER_PRIVATE_EXPEDITED) - || !prev->mm || prev->mm == next->mm)) + if (likely(!test_ti_thread_flag(task_thread_info(tsk), + TIF_MEMBARRIER_PRIVATE_EXPEDITED) || !prev)) return; /* diff --git a/arch/powerpc/mm/mmu_context.c b/arch/powerpc/mm/mmu_context.c index 0f613bc63c50..22f5c91cdc38 100644 --- a/arch/powerpc/mm/mmu_context.c +++ b/arch/powerpc/mm/mmu_context.c @@ -12,6 +12,7 @@ #include <linux/mm.h> #include <linux/cpu.h> +#include <linux/sched/mm.h> #include <asm/mmu_context.h> @@ -67,6 +68,10 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, * * On the read side the barrier is in pte_xchg(), which orders * the store to the PTE vs the load of mm_cpumask. + * + * This full barrier is needed by membarrier when switching + * between processes after store to rq->curr, before user-space + * memory accesses. */ smp_mb(); @@ -89,6 +94,8 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, if (new_on_cpu) radix_kvm_prefetch_workaround(next); + else + membarrier_arch_switch_mm(prev, next, tsk); /* * The actual HW switching method differs between the various diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1bd10c2c0893..d5a9ab8f3836 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -215,8 +215,8 @@ static inline void memalloc_noreclaim_restore(unsigned int flags) #ifdef CONFIG_ARCH_HAS_MEMBARRIER_HOOKS #include <asm/membarrier.h> #else -static inline void membarrier_arch_sched_in(struct task_struct *prev, - struct task_struct *next) +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { } static inline void membarrier_arch_fork(struct task_struct *t, @@ -232,11 +232,6 @@ static inline void membarrier_arch_register_private_expedited( } #endif -static inline void membarrier_sched_in(struct task_struct *prev, - struct task_struct *next) -{ - membarrier_arch_sched_in(prev, next); -} static inline void membarrier_fork(struct task_struct *t, unsigned long clone_flags) { @@ -252,8 +247,8 @@ static inline void membarrier_execve(struct task_struct *t) membarrier_arch_execve(t); } #else -static inline void membarrier_sched_in(struct task_struct *prev, - struct task_struct *next) +static inline void membarrier_arch_switch_mm(struct mm_struct *prev, + struct mm_struct *next, struct task_struct *tsk) { } static inline void membarrier_fork(struct task_struct *t, diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6254f87645de..8585abebe32b 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2643,7 +2643,6 @@ static struct rq *finish_task_switch(struct task_struct *prev) */ prev_state = prev->state; vtime_task_switch(prev); - membarrier_sched_in(prev, current); perf_event_task_sched_in(prev, current); finish_lock_switch(rq, prev); finish_arch_post_lock_switch(); @@ -3357,13 +3356,12 @@ static void __sched notrace __schedule(bool preempt) * * Here are the schemes providing that barrier on the * various architectures: - * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, + * - mm ? switch_mm() : mmdrop() for x86, s390, sparc, PowerPC. + * switch_mm() rely on membarrier_arch_switch_mm() on PowerPC. * - finish_lock_switch() for weakly-ordered * architectures where spin_unlock is a full barrier, * - switch_to() for arm64 (weakly-ordered, spin_unlock * is a RELEASE barrier), - * - membarrier_arch_sched_in() for PowerPC, - * (weakly-ordered, spin_unlock is a RELEASE barrier). */ ++*switch_count; -- 2.11.0