Will Deacon's on March 2, 2019 12:03 am: > @@ -177,6 +178,7 @@ do { \ > static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) > { > __acquire(lock); > + mmiowb_spin_lock(); > arch_spin_lock(&lock->raw_lock); > } > > @@ -188,16 +190,23 @@ static inline void > do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock) > { > __acquire(lock); > + mmiowb_spin_lock(); > arch_spin_lock_flags(&lock->raw_lock, *flags); > } You'd be better to put these inside the spin lock, to match your trylock. Also it means the mmiowb state can be used inside a lock/unlock pair without a compiler barrer forcing it to be reloaded, should be better code generation for very small critical sections on archs which inline lock and unlock. > > static inline int do_raw_spin_trylock(raw_spinlock_t *lock) > { > - return arch_spin_trylock(&(lock)->raw_lock); > + int ret = arch_spin_trylock(&(lock)->raw_lock); > + > + if (ret) > + mmiowb_spin_lock(); > + > + return ret; > } > > static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) > { > + mmiowb_spin_unlock(); > arch_spin_unlock(&lock->raw_lock); > __release(lock); > } Thanks, Nick