[tip:locking/core] locking/mutex: Optimize __mutex_trylock_fast()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Commit-ID:  c427f69564e2a844c5fcf2804042609342513da0
Gitweb:     https://git.kernel.org/tip/c427f69564e2a844c5fcf2804042609342513da0
Author:     Peter Zijlstra <peterz@xxxxxxxxxxxxx>
AuthorDate: Thu, 5 Apr 2018 11:05:35 +0200
Committer:  Ingo Molnar <mingo@xxxxxxxxxx>
CommitDate: Fri, 4 May 2018 10:02:39 +0200

locking/mutex: Optimize __mutex_trylock_fast()

Use try_cmpxchg to avoid the pointless TEST instruction..
And add the (missing) atomic_long_try_cmpxchg*() wrappery.

On x86_64 this gives:

0000000000000710 <mutex_lock>:						0000000000000710 <mutex_lock>:
 710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx                      710:   65 48 8b 14 25 00 00    mov    %gs:0x0,%rdx
 717:   00 00                                                            717:   00 00
                        715: R_X86_64_32S       current_task                                    715: R_X86_64_32S       current_task
 719:   31 c0                   xor    %eax,%eax                         719:   31 c0                   xor    %eax,%eax
 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)                 71b:   f0 48 0f b1 17          lock cmpxchg %rdx,(%rdi)
 720:   48 85 c0                test   %rax,%rax                         720:   75 02                   jne    724 <mutex_lock+0x14>
 723:   75 02                   jne    727 <mutex_lock+0x17>             722:   f3 c3                   repz retq
 725:   f3 c3                   repz retq                                724:   eb da                   jmp    700 <__mutex_lock_slowpath>
 727:   eb d7                   jmp    700 <__mutex_lock_slowpath>       726:   66 2e 0f 1f 84 00 00    nopw   %cs:0x0(%rax,%rax,1)
 729:   0f 1f 80 00 00 00 00    nopl   0x0(%rax)                         72d:   00 00 00

On ARM64 this gives:

000000000000638 <mutex_lock>:						0000000000000638 <mutex_lock>:
     638:       d5384101        mrs     x1, sp_el0                           638:       d5384101        mrs     x1, sp_el0
     63c:       d2800002        mov     x2, #0x0                             63c:       d2800002        mov     x2, #0x0
     640:       f9800011        prfm    pstl1strm, [x0]                      640:       f9800011        prfm    pstl1strm, [x0]
     644:       c85ffc03        ldaxr   x3, [x0]                             644:       c85ffc03        ldaxr   x3, [x0]
     648:       ca020064        eor     x4, x3, x2                           648:       ca020064        eor     x4, x3, x2
     64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>            64c:       b5000064        cbnz    x4, 658 <mutex_lock+0x20>
     650:       c8047c01        stxr    w4, x1, [x0]                         650:       c8047c01        stxr    w4, x1, [x0]
     654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>             654:       35ffff84        cbnz    w4, 644 <mutex_lock+0xc>
     658:       b40000c3        cbz     x3, 670 <mutex_lock+0x38>            658:       b5000043        cbnz    x3, 660 <mutex_lock+0x28>
     65c:       a9bf7bfd        stp     x29, x30, [sp,#-16]!                 65c:       d65f03c0        ret
     660:       910003fd        mov     x29, sp                              660:       a9bf7bfd        stp     x29, x30, [sp,#-16]!
     664:       97ffffef        bl      620 <__mutex_lock_slowpath>          664:       910003fd        mov     x29, sp
     668:       a8c17bfd        ldp     x29, x30, [sp],#16                   668:       97ffffee        bl      620 <__mutex_lock_slowpath>
     66c:       d65f03c0        ret                                          66c:       a8c17bfd        ldp     x29, x30, [sp],#16
     670:       d65f03c0        ret                                          670:       d65f03c0        ret

Reported-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
Acked-by: Will Deacon <will.deacon@xxxxxxx>
Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx>
---
 include/asm-generic/atomic-long.h | 17 +++++++++++++++++
 kernel/locking/mutex.c            |  3 ++-
 2 files changed, 19 insertions(+), 1 deletion(-)

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 5b2b0b5ea06d..87d14476edc2 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -25,6 +25,7 @@ typedef atomic64_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
 #define ATOMIC_LONG_PFX(x)	atomic64 ## x
+#define ATOMIC_LONG_TYPE	s64
 
 #else
 
@@ -32,6 +33,7 @@ typedef atomic_t atomic_long_t;
 
 #define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
 #define ATOMIC_LONG_PFX(x)	atomic ## x
+#define ATOMIC_LONG_TYPE	int
 
 #endif
 
@@ -90,6 +92,21 @@ ATOMIC_LONG_ADD_SUB_OP(sub, _release)
 #define atomic_long_cmpxchg(l, old, new) \
 	(ATOMIC_LONG_PFX(_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), (old), (new)))
 
+
+#define atomic_long_try_cmpxchg_relaxed(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg_acquire(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg_acquire)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg_release(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg_release)((ATOMIC_LONG_PFX(_t) *)(l), \
+					   (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+#define atomic_long_try_cmpxchg(l, old, new) \
+	(ATOMIC_LONG_PFX(_try_cmpxchg)((ATOMIC_LONG_PFX(_t) *)(l), \
+				       (ATOMIC_LONG_TYPE *)(old), (ATOMIC_LONG_TYPE)(new)))
+
+
 #define atomic_long_xchg_relaxed(v, new) \
 	(ATOMIC_LONG_PFX(_xchg_relaxed)((ATOMIC_LONG_PFX(_t) *)(v), (new)))
 #define atomic_long_xchg_acquire(v, new) \
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 2048359f33d2..f44f658ae629 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -139,8 +139,9 @@ static inline bool __mutex_trylock(struct mutex *lock)
 static __always_inline bool __mutex_trylock_fast(struct mutex *lock)
 {
 	unsigned long curr = (unsigned long)current;
+	unsigned long zero = 0UL;
 
-	if (!atomic_long_cmpxchg_acquire(&lock->owner, 0UL, curr))
+	if (atomic_long_try_cmpxchg_acquire(&lock->owner, &zero, curr))
 		return true;
 
 	return false;
--
To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Stable Commits]     [Linux Stable Kernel]     [Linux Kernel]     [Linux USB Devel]     [Linux Video &Media]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux