Commit-ID: d89e588ca4081615216cc25f2489b0281ac0bfe9 Gitweb: http://git.kernel.org/tip/d89e588ca4081615216cc25f2489b0281ac0bfe9 Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Mon, 5 Sep 2016 11:37:53 +0200 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitDate: Thu, 10 Aug 2017 12:29:02 +0200 locking: Introduce smp_mb__after_spinlock() Since its inception, our understanding of ACQUIRE, esp. as applied to spinlocks, has changed somewhat. Also, I wonder if, with a simple change, we cannot make it provide more. The problem with the comment is that the STORE done by spin_lock isn't itself ordered by the ACQUIRE, and therefore a later LOAD can pass over it and cross with any prior STORE, rendering the default WMB insufficient (pointed out by Alan). Now, this is only really a problem on PowerPC and ARM64, both of which already defined smp_mb__before_spinlock() as a smp_mb(). At the same time, we can get a much stronger construct if we place that same barrier _inside_ the spin_lock(). In that case we upgrade the RCpc spinlock to an RCsc. That would make all schedule() calls fully transitive against one another. Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Acked-by: Will Deacon <will.deacon@xxxxxxx> Cc: Alan Stern <stern@xxxxxxxxxxxxxxxxxxx> Cc: Benjamin Herrenschmidt <benh@xxxxxxxxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx> Cc: Nicholas Piggin <npiggin@xxxxxxxxx> Cc: Oleg Nesterov <oleg@xxxxxxxxxx> Cc: Paul McKenney <paulmck@xxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> --- arch/arm64/include/asm/spinlock.h | 2 ++ arch/powerpc/include/asm/spinlock.h | 3 +++ include/linux/atomic.h | 3 +++ include/linux/spinlock.h | 36 ++++++++++++++++++++++++++++++++++++ kernel/sched/core.c | 4 ++-- 5 files changed, 46 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index cae331d..b103888 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -367,5 +367,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) * smp_mb__before_spinlock() can restore the required ordering. */ #define smp_mb__before_spinlock() smp_mb() +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() #endif /* __ASM_SPINLOCK_H */ diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h index 8c1b913..c1b1ec9 100644 --- a/arch/powerpc/include/asm/spinlock.h +++ b/arch/powerpc/include/asm/spinlock.h @@ -342,5 +342,8 @@ static inline void arch_write_unlock(arch_rwlock_t *rw) #define arch_read_relax(lock) __rw_yield(lock) #define arch_write_relax(lock) __rw_yield(lock) +/* See include/linux/spinlock.h */ +#define smp_mb__after_spinlock() smp_mb() + #endif /* __KERNEL__ */ #endif /* __ASM_SPINLOCK_H */ diff --git a/include/linux/atomic.h b/include/linux/atomic.h index c56be74..40d6bfe 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -38,6 +38,9 @@ * Besides, if an arch has a special barrier for acquire/release, it could * implement its own __atomic_op_* and use the same framework for building * variants + * + * If an architecture overrides __atomic_op_acquire() it will probably want + * to define smp_mb__after_spinlock(). */ #ifndef __atomic_op_acquire #define __atomic_op_acquire(op, args...) \ diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index d9510e8..8402810 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -130,6 +130,42 @@ do { \ #define smp_mb__before_spinlock() smp_wmb() #endif +/* + * This barrier must provide two things: + * + * - it must guarantee a STORE before the spin_lock() is ordered against a + * LOAD after it, see the comments at its two usage sites. + * + * - it must ensure the critical section is RCsc. + * + * The latter is important for cases where we observe values written by other + * CPUs in spin-loops, without barriers, while being subject to scheduling. + * + * CPU0 CPU1 CPU2 + * + * for (;;) { + * if (READ_ONCE(X)) + * break; + * } + * X=1 + * <sched-out> + * <sched-in> + * r = X; + * + * without transitivity it could be that CPU1 observes X!=0 breaks the loop, + * we get migrated and CPU2 sees X==0. + * + * Since most load-store architectures implement ACQUIRE with an smp_mb() after + * the LL/SC loop, they need no further barriers. Similarly all our TSO + * architectures imply an smp_mb() for each atomic instruction and equally don't + * need more. + * + * Architectures that can implement ACQUIRE better need to take care. + */ +#ifndef smp_mb__after_spinlock +#define smp_mb__after_spinlock() do { } while (0) +#endif + /** * raw_spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 0869b20..9fece58 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1967,8 +1967,8 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags) * reordered with p->state check below. This pairs with mb() in * set_current_state() the waiting thread does. */ - smp_mb__before_spinlock(); raw_spin_lock_irqsave(&p->pi_lock, flags); + smp_mb__after_spinlock(); if (!(p->state & state)) goto out; @@ -3281,8 +3281,8 @@ static void __sched notrace __schedule(bool preempt) * can't be reordered with __set_current_state(TASK_INTERRUPTIBLE) * done by the caller to avoid the race with signal_wake_up(). */ - smp_mb__before_spinlock(); rq_lock(rq, &rf); + smp_mb__after_spinlock(); /* Promote REQ to ACT */ rq->clock_update_flags <<= 1; -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html
![]() |