On Tue, Mar 1, 2022 at 1:04 AM Peter Zijlstra <peterz@xxxxxxxxxxxxx> wrote: > > On Mon, Feb 28, 2022 at 05:04:10PM -0800, Namhyung Kim wrote: > > > @@ -171,9 +172,12 @@ bool __sched __percpu_down_read(struct percpu_rw_semaphore *sem, bool try) > > if (try) > > return false; > > > > + trace_contention_begin(sem, _RET_IP_, > > + LCB_F_READ | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE); > > That is a bit unwieldy, isn't it ? > > > preempt_enable(); > > percpu_rwsem_wait(sem, /* .reader = */ true); > > preempt_disable(); > > + trace_contention_end(sem); > > > > return true; > > } > > @@ -224,8 +228,13 @@ void __sched percpu_down_write(struct percpu_rw_semaphore *sem) > > * Try set sem->block; this provides writer-writer exclusion. > > * Having sem->block set makes new readers block. > > */ > > - if (!__percpu_down_write_trylock(sem)) > > + if (!__percpu_down_write_trylock(sem)) { > > + unsigned int flags = LCB_F_WRITE | LCB_F_PERCPU | TASK_UNINTERRUPTIBLE; > > + > > + trace_contention_begin(sem, _RET_IP_, flags); > > percpu_rwsem_wait(sem, /* .reader = */ false); > > + trace_contention_end(sem); > > + } > > > > /* smp_mb() implied by __percpu_down_write_trylock() on success -- D matches A */ > > > > Wouldn't it be easier to stick all that inside percpu_rwsem_wait() and > have it only once? You can even re-frob the wait loop such that the > tracepoint can use current->__state or something. > > diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c > index c9fdae94e098..ca01f8ff88e5 100644 > --- a/kernel/locking/percpu-rwsem.c > +++ b/kernel/locking/percpu-rwsem.c > @@ -154,13 +154,16 @@ static void percpu_rwsem_wait(struct percpu_rw_semaphore *sem, bool reader) > } > spin_unlock_irq(&sem->waiters.lock); > > + set_current_state(TASK_UNINTERRUPTIBLE); > + trace_contention_begin(sem, _RET_IP_, LCB_F_PERCPU | LCB_F_WRITE*!reader); > while (wait) { > - set_current_state(TASK_UNINTERRUPTIBLE); > if (!smp_load_acquire(&wq_entry.private)) > break; > schedule(); > + set_current_state(TASK_UNINTERRUPTIBLE); > } > __set_current_state(TASK_RUNNING); > + trace_contention_end(sem); > } Looks good. I'll make similar changes in other places too. One general concern of moving inside is that it makes the _RET_IP_ useless. If we can pass the caller ip to the slowpath function, it'd be fine. > > > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > > index 8555c4efe97c..e49f5d2a232b 100644 > > --- a/kernel/locking/rtmutex.c > > +++ b/kernel/locking/rtmutex.c > > @@ -24,6 +24,8 @@ > > #include <linux/sched/wake_q.h> > > #include <linux/ww_mutex.h> > > > > +#include <trace/events/lock.h> > > + > > #include "rtmutex_common.h" > > > > #ifndef WW_RT > > @@ -1652,10 +1654,16 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, > > static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, > > unsigned int state) > > { > > + int ret; > > + > > if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) > > return 0; > > > > - return rt_mutex_slowlock(lock, NULL, state); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | state); > > + ret = rt_mutex_slowlock(lock, NULL, state); > > + trace_contention_end(lock); > > + > > + return ret; > > } > > #endif /* RT_MUTEX_BUILD_MUTEX */ > > > > @@ -1718,9 +1726,11 @@ static __always_inline void __sched rtlock_slowlock(struct rt_mutex_base *lock) > > { > > unsigned long flags; > > > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT | TASK_RTLOCK_WAIT); > > raw_spin_lock_irqsave(&lock->wait_lock, flags); > > rtlock_slowlock_locked(lock); > > raw_spin_unlock_irqrestore(&lock->wait_lock, flags); > > + trace_contention_end(lock); > > } > > Same, if you do it one level in, you can have the tracepoint itself look > at current->__state. Also, you seem to have forgotten to trace the > return value. Now you can't tell if the lock was acquired, or was denied > (ww_mutex) or we were interrupted. Right, thanks for pointing that out. Will add that. Thanks, Namhyung > > diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c > index 8555c4efe97c..18b9f4bf6f34 100644 > --- a/kernel/locking/rtmutex.c > +++ b/kernel/locking/rtmutex.c > @@ -1579,6 +1579,8 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > > set_current_state(state); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); > + > ret = task_blocks_on_rt_mutex(lock, waiter, current, ww_ctx, chwalk); > if (likely(!ret)) > ret = rt_mutex_slowlock_block(lock, ww_ctx, state, NULL, waiter); > @@ -1601,6 +1603,9 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, > * unconditionally. We might have to fix that up. > */ > fixup_rt_mutex_waiters(lock); > + > + trace_contention_end(lock, ret); > + > return ret; > } > > @@ -1683,6 +1688,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) > /* Save current state and set state to TASK_RTLOCK_WAIT */ > current_save_and_set_rtlock_wait_state(); > > + trace_contention_begin(lock, _RET_IP_, LCB_F_RT); > + > task_blocks_on_rt_mutex(lock, &waiter, current, NULL, RT_MUTEX_MIN_CHAINWALK); > > for (;;) { > @@ -1703,6 +1710,8 @@ static void __sched rtlock_slowlock_locked(struct rt_mutex_base *lock) > set_current_state(TASK_RTLOCK_WAIT); > } > > + trace_contention_end(lock, 0); > + > /* Restore the task state */ > current_restore_rtlock_saved_state(); >