On 28 Mar 2023 16:02:59 +0200 Peter Zijlstra <peterz@xxxxxxxxxxxxx> > On Mon, Mar 27, 2023 at 04:24:13PM -0400, Waiman Long wrote: > > kernel/locking/rwsem.c | 34 +++++++++++++++++++++++++++++++--- > > 1 file changed, 31 insertions(+), 3 deletions(-) > > > > diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c > > index 7bd26e64827a..cf9dc1e250e0 100644 > > --- a/kernel/locking/rwsem.c > > +++ b/kernel/locking/rwsem.c > > @@ -426,6 +426,7 @@ rwsem_waiter_wake(struct rwsem_waiter *waiter, struct wake_q_head *wake_q) > > static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, > > struct rwsem_waiter *waiter) > > { > > + bool first = (rwsem_first_waiter(sem) == waiter); > > long count, new; > > > > lockdep_assert_held(&sem->wait_lock); > > @@ -434,6 +435,9 @@ static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, > > do { > > new = count; > > > > + if (!first && (count & (RWSEM_FLAG_HANDOFF | RWSEM_LOCK_MASK))) > > + return false; > > + > > if (count & RWSEM_LOCK_MASK) { > > /* > > * A waiter (first or not) can set the handoff bit > > I couldn't immediately make sense of the above, and I think there's case > where not-first would still want to set handoff you're missing. > > After a few detours, I ended up with the below; does that make sense or > did I just make a bigger mess? (entirely possible due to insufficient > sleep etc..). > > > --- a/kernel/locking/rwsem.c > +++ b/kernel/locking/rwsem.c > @@ -426,12 +426,27 @@ rwsem_waiter_wake(struct rwsem_waiter *w > static inline bool rwsem_try_write_lock(struct rw_semaphore *sem, > struct rwsem_waiter *waiter) > { > + bool first = (rwsem_first_waiter(sem) == waiter); > long count, new; > > lockdep_assert_held(&sem->wait_lock); > > count = atomic_long_read(&sem->count); > do { > + /* > + * first handoff > + * > + * 0 0 | take > + * 0 1 | not take > + * 1 0 | take > + * 1 1 | take > + * > + */ > + bool handoff = count & RWSEM_FLAG_HANDOFF; > + > + if (!first && handoff) > + return false; > + > new = count; > > if (count & RWSEM_LOCK_MASK) { > @@ -440,7 +455,7 @@ static inline bool rwsem_try_write_lock( > * if it is an RT task or wait in the wait queue > * for too long. > */ > - if ((count & RWSEM_FLAG_HANDOFF) || > + if (handoff || > (!rt_task(waiter->task) && > !time_after(jiffies, waiter->timeout))) > return false; > @@ -501,11 +516,19 @@ static void rwsem_writer_wake(struct rw_ > */ > list_del(&waiter->list); > atomic_long_set(&sem->owner, (long)waiter->task); > - > - } else if (!rwsem_try_write_lock(sem, waiter)) > + rwsem_waiter_wake(waiter, wake_q); > return; > + } > > - rwsem_waiter_wake(waiter, wake_q); > + /* > + * Mark writer at the front of the queue for wakeup. > + * > + * Until the task is actually awoken later by the caller, other writers > + * are able to steal it. Readers, on the other hand, will block as they > + * will notice the queued writer. > + */ > + wake_q_add(wake_q, waiter->task); > + lockevent_inc(rwsem_wake_writer); > } > > static void rwsem_reader_wake(struct rw_semaphore *sem, > @@ -1038,6 +1061,25 @@ rwsem_waiter_wait(struct rw_semaphore *s > /* Matches rwsem_waiter_wake()'s smp_store_release(). */ > break; > } > + if (!reader) { > + /* > + * If rwsem_writer_wake() did not take the lock, we must > + * rwsem_try_write_lock() here. > + */ > + raw_spin_lock_irq(&sem->wait_lock); > + if (waiter->task && rwsem_try_write_lock(sem, waiter)) { > + waiter->task = NULL; > + raw_spin_unlock_irq(&sem->wait_lock); > + break; > + } > + raw_spin_unlock_irq(&sem->wait_lock); > + > + if (waiter->handoff_set) > + rwsem_spin_on_owner(sem); No sense made without waiter being the first one if the comment below is correct. > + > + if (!smp_load_acquire(&waiter->task)) > + break; > + } > if (signal_pending_state(state, current)) { > raw_spin_lock_irq(&sem->wait_lock); > if (waiter->task) > https://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git/tree/kernel/locking/rwsem.c?h=locking/rwsem&id=91deffc826935#n459 /* * We have either acquired the lock with handoff bit cleared or set * the handoff bit. Only the first waiter can have its handoff_set * set here to enable optimistic spinning in slowpath loop. */ if (new & RWSEM_FLAG_HANDOFF) { waiter->handoff_set = true; lockevent_inc(rwsem_wlock_handoff); return false; }