Add lock event counting calls so that we can track the number of lock events happening in the rwsem code. With CONFIG_LOCK_EVENT_COUNTS on and booting a 1-socket 22-core 44-thread x86-64 system, the non-zero rwsem counts after system bootup were as follows: rwsem_opt_fail=113 rwsem_opt_wlock=13647 rwsem_rlock=176 rwsem_rlock_fast=10 rwsem_wake_reader=153 rwsem_wake_writer=139 rwsem_wlock=113 It can be seen that most of the lock acquisitions in the slowpath were writer-locks in the optimistic spinning code path with no sleeping at all. Only about 4% of locks were acquired after sleeping. Signed-off-by: Waiman Long <longman@xxxxxxxxxx> --- arch/Kconfig | 2 +- kernel/locking/lock_events_list.h | 17 +++++++++++++++++ kernel/locking/rwsem-xadd.c | 12 ++++++++++++ 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index af147c2..7471791 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -891,7 +891,7 @@ config ARCH_USE_MEMREMAP_PROT config LOCK_EVENT_COUNTS bool "Locking event counts collection" depends on DEBUG_FS - depends on QUEUED_SPINLOCKS + depends on (QUEUED_SPINLOCKS || RWSEM_XCHGADD_ALGORITHM) ---help--- Enable light-weight counting of various locking related events in the system with minimal performance impact. This reduces diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h index 8b4d2e1..c33c5df 100644 --- a/kernel/locking/lock_events_list.h +++ b/kernel/locking/lock_events_list.h @@ -48,3 +48,20 @@ LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */ LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */ #endif /* CONFIG_QUEUED_SPINLOCKS */ + +#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM +/* + * Locking events for rwsem + */ +LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */ +LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */ +LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */ +LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */ +LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */ +LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */ +LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */ +LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */ +LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */ +LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */ +LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */ +#endif /* CONFIG_RWSEM_XCHGADD_ALGORITHM */ diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 62422a6..fff231a 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -20,6 +20,7 @@ #include <linux/osq_lock.h> #include "rwsem-xadd.h" +#include "lock_events.h" /* * Guide to the rw_semaphore's count field for common values. @@ -147,6 +148,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, * will notice the queued writer. */ wake_q_add(wake_q, waiter->task); + lockevent_inc(rwsem_wake_writer); } return; @@ -214,6 +216,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, } adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; + lockevent_cond_inc(rwsem_wake_reader, woken); if (list_empty(&sem->wait_list)) { /* hit end of list above */ adjustment -= RWSEM_WAITING_BIAS; @@ -269,6 +272,7 @@ static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) count + RWSEM_ACTIVE_WRITE_BIAS); if (old == count) { rwsem_set_owner(sem); + lockevent_inc(rwsem_opt_wlock); return true; } @@ -394,6 +398,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) osq_unlock(&sem->osq); done: preempt_enable(); + lockevent_cond_inc(rwsem_opt_fail, !taken); return taken; } @@ -441,6 +446,7 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) if (atomic_long_read(&sem->count) >= 0) { raw_spin_unlock_irq(&sem->wait_lock); rwsem_set_reader_owned(sem); + lockevent_inc(rwsem_rlock_fast); return sem; } adjustment += RWSEM_WAITING_BIAS; @@ -477,9 +483,11 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) break; } schedule(); + lockevent_inc(rwsem_sleep_reader); } __set_current_state(TASK_RUNNING); + lockevent_inc(rwsem_rlock); return sem; out_nolock: list_del(&waiter.list); @@ -487,6 +495,7 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); raw_spin_unlock_irq(&sem->wait_lock); __set_current_state(TASK_RUNNING); + lockevent_inc(rwsem_rlock_fail); return ERR_PTR(-EINTR); } @@ -580,6 +589,7 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) goto out_nolock; schedule(); + lockevent_inc(rwsem_sleep_writer); set_current_state(state); } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); @@ -588,6 +598,7 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) __set_current_state(TASK_RUNNING); list_del(&waiter.list); raw_spin_unlock_irq(&sem->wait_lock); + lockevent_inc(rwsem_wlock); return ret; @@ -601,6 +612,7 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); raw_spin_unlock_irq(&sem->wait_lock); wake_up_q(&wake_q); + lockevent_inc(rwsem_wlock_fail); return ERR_PTR(-EINTR); } -- 1.8.3.1