[PATCH V3] x86 spinlock: Fix memory corruption on completing completions

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Paravirt spinlock clears slowpath flag after doing unlock.
As explained by Linus currently it does:
                prev = *lock;
                add_smp(&lock->tickets.head, TICKET_LOCK_INC);

                /* add_smp() is a full mb() */

                if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
                        __ticket_unlock_slowpath(lock, prev);


which is *exactly* the kind of things you cannot do with spinlocks,
because after you've done the "add_smp()" and released the spinlock
for the fast-path, you can't access the spinlock any more.  Exactly
because a fast-path lock might come in, and release the whole data
structure.

Linus suggested that we should not do any writes to lock after unlock(),
and we can move slowpath clearing to fastpath lock.

So this patch implements the fix with:
1. Moving slowpath flag to head (Oleg).
2. use xadd to avoid read/write after unlock that checks the need for
unlock_kick (Linus).

Result:
 setup: 16core (32 cpu +ht sandy bridge 8GB 16vcpu guest)
 benchmark overcommit %improve
 kernbench  1x           -0.13
 kernbench  2x            0.02
 dbench     1x           -1.77
 dbench     2x           -0.63

[Jeremy: hinted missing TICKET_LOCK_INC for kick]
[Oleg: Moving slowpath flag to head, ticket_equals idea]

Reported-by: Sasha Levin <sasha.levin@xxxxxxxxxx>
Suggested-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxxxxxxxxxxxxx>
---
 arch/x86/include/asm/spinlock.h | 87 ++++++++++++++++++++---------------------
 arch/x86/kernel/kvm.c           |  4 +-
 2 files changed, 45 insertions(+), 46 deletions(-)

potential TODO:
 * The whole patch be splitted into, 1. move slowpath flag
     2. fix memory corruption in completion problem ??
 * May be we could directly pass inc for __ticket_check_and_clear_slowpath
   but I hope current code is more readable.

Changes since V2:
  - Move the slowpath flag to head, this enables xadd usage in unlock code
    and inturn we can get rid of read/write after  unlock (Oleg)
  - usage of ticket_equals (Oleg)

Changes since V1:
  - Add missing TICKET_LOCK_INC before unlock kick (fixes hang in overcommit: Jeremy).
  - Remove SLOWPATH_FLAG clearing in fast lock. (Jeremy)
  - clear SLOWPATH_FLAG in arch_spin_value_unlocked during comparison.
 Note: The current implementation is still based on avoid writing after unlock.
  we could still have potential invalid memory read. (Sasha)

 Result: 
 setup: 16core (32 cpu +ht sandy bridge 8GB 16vcpu guest)

base = 3_19_rc7

3_19_rc7_spinfix_v3
+-----------+-----------+-----------+------------+-----------+
     kernbench (Time taken in sec lower is better)
+-----------+-----------+-----------+------------+-----------+
     base       %stdev    patched      %stdev      %improve
+-----------+-----------+-----------+------------+-----------+
1x   54.2300     3.0652	    54.3008     4.0366	  -0.13056
2x   90.1883     5.5509	    90.1650     6.4336	   0.02583
+-----------+-----------+-----------+------------+-----------+
+-----------+-----------+-----------+------------+-----------+
    dbench (Throughput higher is better) 
+-----------+-----------+-----------+------------+-----------+
     base       %stdev    patched      %stdev      %improve
+-----------+-----------+-----------+------------+-----------+
1x 7029.9188     2.5952	  6905.0712     4.4737	  -1.77595
2x 3254.2075    14.8291	  3233.7137    26.8784	  -0.62976
+-----------+-----------+-----------+------------+-----------+

 (here is the result I got from the patches, I believe there may
 be some small overhead from xadd etc, but overall looks fine but
 a thorough test may be needed)

diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 625660f..9697b45 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -46,7 +46,7 @@ static __always_inline bool static_key_false(struct static_key *key);
 
 static inline void __ticket_enter_slowpath(arch_spinlock_t *lock)
 {
-	set_bit(0, (volatile unsigned long *)&lock->tickets.tail);
+	set_bit(0, (volatile unsigned long *)&lock->tickets.head);
 }
 
 #else  /* !CONFIG_PARAVIRT_SPINLOCKS */
@@ -60,10 +60,30 @@ static inline void __ticket_unlock_kick(arch_spinlock_t *lock,
 }
 
 #endif /* CONFIG_PARAVIRT_SPINLOCKS */
+static inline int  __tickets_equal(__ticket_t one, __ticket_t two)
+{
+	return !((one ^ two) & ~TICKET_SLOWPATH_FLAG);
+}
+
+static inline void __ticket_check_and_clear_slowpath(arch_spinlock_t *lock,
+							__ticket_t head)
+{
+	if (head & TICKET_SLOWPATH_FLAG) {
+		arch_spinlock_t old, new;
+
+		old.tickets.head = head;
+		new.tickets.head = head & ~TICKET_SLOWPATH_FLAG;
+		old.tickets.tail = new.tickets.head + TICKET_LOCK_INC;
+		new.tickets.tail = old.tickets.tail;
+
+		/* try to clear slowpath flag when there are no contenders */
+		cmpxchg(&lock->head_tail, old.head_tail, new.head_tail);
+	}
+}
 
 static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
 {
-	return lock.tickets.head == lock.tickets.tail;
+	return __tickets_equal(lock.tickets.head, lock.tickets.tail);
 }
 
 /*
@@ -87,18 +107,22 @@ static __always_inline void arch_spin_lock(arch_spinlock_t *lock)
 	if (likely(inc.head == inc.tail))
 		goto out;
 
-	inc.tail &= ~TICKET_SLOWPATH_FLAG;
 	for (;;) {
 		unsigned count = SPIN_THRESHOLD;
 
 		do {
-			if (READ_ONCE(lock->tickets.head) == inc.tail)
-				goto out;
+			inc.head = READ_ONCE(lock->tickets.head);
+			if (__tickets_equal(inc.head, inc.tail))
+				goto clear_slowpath;
 			cpu_relax();
 		} while (--count);
 		__ticket_lock_spinning(lock, inc.tail);
 	}
-out:	barrier();	/* make sure nothing creeps before the lock is taken */
+clear_slowpath:
+	if (TICKET_SLOWPATH_FLAG)
+		__ticket_check_and_clear_slowpath(lock, inc.head);
+out:
+	barrier();	/* make sure nothing creeps before the lock is taken */
 }
 
 static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
@@ -106,56 +130,30 @@ static __always_inline int arch_spin_trylock(arch_spinlock_t *lock)
 	arch_spinlock_t old, new;
 
 	old.tickets = READ_ONCE(lock->tickets);
-	if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG))
+	if (!__tickets_equal(old.tickets.head, old.tickets.tail))
 		return 0;
 
 	new.head_tail = old.head_tail + (TICKET_LOCK_INC << TICKET_SHIFT);
+	new.head_tail &= ~TICKET_SLOWPATH_FLAG;
 
 	/* cmpxchg is a full barrier, so nothing can move before it */
 	return cmpxchg(&lock->head_tail, old.head_tail, new.head_tail) == old.head_tail;
 }
 
-static inline void __ticket_unlock_slowpath(arch_spinlock_t *lock,
-					    arch_spinlock_t old)
-{
-	arch_spinlock_t new;
-
-	BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
-
-	/* Perform the unlock on the "before" copy */
-	old.tickets.head += TICKET_LOCK_INC;
-
-	/* Clear the slowpath flag */
-	new.head_tail = old.head_tail & ~(TICKET_SLOWPATH_FLAG << TICKET_SHIFT);
-
-	/*
-	 * If the lock is uncontended, clear the flag - use cmpxchg in
-	 * case it changes behind our back though.
-	 */
-	if (new.tickets.head != new.tickets.tail ||
-	    cmpxchg(&lock->head_tail, old.head_tail,
-					new.head_tail) != old.head_tail) {
-		/*
-		 * Lock still has someone queued for it, so wake up an
-		 * appropriate waiter.
-		 */
-		__ticket_unlock_kick(lock, old.tickets.head);
-	}
-}
-
 static __always_inline void arch_spin_unlock(arch_spinlock_t *lock)
 {
 	if (TICKET_SLOWPATH_FLAG &&
-	    static_key_false(&paravirt_ticketlocks_enabled)) {
-		arch_spinlock_t prev;
+		static_key_false(&paravirt_ticketlocks_enabled)) {
+		__ticket_t head;
 
-		prev = *lock;
-		add_smp(&lock->tickets.head, TICKET_LOCK_INC);
+		BUILD_BUG_ON(((__ticket_t)NR_CPUS) != NR_CPUS);
 
-		/* add_smp() is a full mb() */
+		head = xadd(&lock->tickets.head, TICKET_LOCK_INC);
 
-		if (unlikely(lock->tickets.tail & TICKET_SLOWPATH_FLAG))
-			__ticket_unlock_slowpath(lock, prev);
+		if (unlikely(head & TICKET_SLOWPATH_FLAG)) {
+			head &= ~TICKET_SLOWPATH_FLAG;
+			__ticket_unlock_kick(lock, (head + TICKET_LOCK_INC));
+		}
 	} else
 		__add(&lock->tickets.head, TICKET_LOCK_INC, UNLOCK_LOCK_PREFIX);
 }
@@ -164,7 +162,7 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
 {
 	struct __raw_tickets tmp = READ_ONCE(lock->tickets);
 
-	return tmp.tail != tmp.head;
+	return tmp.tail != (tmp.head & ~TICKET_SLOWPATH_FLAG);
 }
 
 static inline int arch_spin_is_contended(arch_spinlock_t *lock)
@@ -191,8 +189,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
 		 * We need to check "unlocked" in a loop, tmp.head == head
 		 * can be false positive because of overflow.
 		 */
-		if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) ||
-		    tmp.head != head)
+		if (__tickets_equal(tmp.head, tmp.tail) || tmp.head != head)
 			break;
 
 		cpu_relax();
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 94f6434..e758b46 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -727,6 +727,7 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 	int cpu;
 	u64 start;
 	unsigned long flags;
+	__ticket_t head;
 
 	if (in_nmi())
 		return;
@@ -772,7 +773,8 @@ __visible void kvm_lock_spinning(struct arch_spinlock *lock, __ticket_t want)
 	 * check again make sure it didn't become free while
 	 * we weren't looking.
 	 */
-	if (ACCESS_ONCE(lock->tickets.head) == want) {
+	head = ACCESS_ONCE(lock->tickets.head);
+	if (__tickets_equal(head, want)) {
 		add_stats(TAKEN_SLOW_PICKUP, 1);
 		goto out;
 	}
-- 
1.7.11.7

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization




[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux