On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) > if (try) > return false; > > + trace_contention_begin(sem, _RET_IP_, > + LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE); That is a bit unwieldy, isn't it ? > preempt_enable(); > percpu_rwsem_wait(sem, /* .reader = */ true); > preempt_disable(); > + trace_contention_end(sem); > > return true; > } > @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) > * Try set sem->block; this provides writer-writer exclusion. > * Having sem->block set makes new readers block. > */ > - if (!__percpu_down_write_trylock(sem)) > + if (!__percpu_down_write_trylock(sem)) { > + unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE; > + > + trace_contention_begin(sem, _RET_IP_, flags); > percpu_rwsem_wait(sem, /* .reader = */ false); > + trace_contention_end(sem); > + } > > /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ > Wouldn't it be easier to stick all that inside percpu_rwsem_wait() and have it only once? You can even re-frob the wait loop such that the tracepoint can use current->__state or something. diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index c9fdae94e098..ca01f8ff88e5 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -154,13 +154,16 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) } spin_unlock_irq(&sem->waiters.lock); + set_current_state(TASK_UNINTERRUPTIBLE); + trace_contention_begin(sem, _RET_IP_, LCB_F_PERCPU | LCB_F_WRITE*!reader); while (wait) { - set_current_state(TASK_UNINTERRUPTIBLE); if (!smp_load_acquire(&wq_entry.private)) break; schedule(); + set_current_state(TASK_UNINTERRUPTIBLE); } __set_current_state(TASK_RUNNING); + trace_contention_end(sem); } bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > index 8555c4efe97c..e49f5d2a232b 100644 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -24,6 +24,8 @@ > #include <linux/sched/wake_q.h> > #include <linux/ww_mutex.h> > > +#include <trace/events/lock.h> > + > #include "rtmutex_common.h" > > #ifndef WW_RT > @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, > static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, > unsigned int state) > { > + int ret; > + > if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) > return 0; > > - return rt_mutex_slowlock(lock, NULL, state); > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state); > + ret = rt_mutex_slowlock(lock, NULL, state); > + trace_contention_end(lock); > + > + return ret; > } > #endif /* RT_MUTEX_BUILD_MUTEX */ > > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) > { > unsigned long flags; > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT); > raw_spin_lock_irqsave(&lock->wait_lock, flags); > rtlock_slowlock_locked(lock); > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > + trace_contention_end(lock); > } Same, if you do it one level in, you can have the tracepoint itself look at current->__state. Also, you seem to have forgotten to trace the return value. Now you can't tell if the lock was acquired, or was denied (ww_mutex) or we were interrupted. diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 8555c4efe97c..18b9f4bf6f34 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, set_current_state(state); + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); + ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); if (likely(!ret)) ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, * unconditionally. We might have to fix that up. */ fixup_rt_mutex_waiters(lock); + + trace_contention_end(lock, ret); + return ret; } @@ -1683,6 +1688,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) /* Save current state and set state to TASK_RTLOCK_WAIT */ current_save_and_set_rtlock_wait_state(); + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); + task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); for (;;) { @@ -1703,6 +1710,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) set_current_state(TASK_RTLOCK_WAIT); } + trace_contention_end(lock, 0); + /* Restore the task state */ current_restore_rtlock_saved_state();