Le mercredi 03 novembre 2010 Ã 10:59 -0400, Jeremy Fitzhardinge a Ãcrit : > From: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx> > > The inner loop of __ticket_spin_lock isn't doing anything very special, > so reimplement it in C. > > For the 8 bit ticket lock variant, we use a register union to get direct > access to the lower and upper bytes in the tickets, but unfortunately gcc > won't generate a direct comparison between the two halves of the register, > so the generated asm isn't quite as pretty as the hand-coded version. > However benchmarking shows that this is actually a small improvement in > runtime performance on some benchmarks, and never a slowdown. > > We also need to make sure there's a barrier at the end of the lock loop > to make sure that the compiler doesn't move any instructions from within > the locked region into the region where we don't yet own the lock. > > Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx> > --- > arch/x86/include/asm/spinlock.h | 58 +++++++++++++++++++------------------- > 1 files changed, 29 insertions(+), 29 deletions(-) > > diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h > index d6d5784..6711d36 100644 > --- a/arch/x86/include/asm/spinlock.h > +++ b/arch/x86/include/asm/spinlock.h > @@ -58,21 +58,21 @@ > #if (NR_CPUS < 256) > static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) > { > - unsigned short inc = 1 << TICKET_SHIFT; > - > - asm volatile ( > - LOCK_PREFIX "xaddw %w0, %1\n" > - "1:\t" > - "cmpb %h0, %b0\n\t" > - "je 2f\n\t" > - "rep ; nop\n\t" > - "movb %1, %b0\n\t" > - /* don't need lfence here, because loads are in-order */ > - "jmp 1b\n" > - "2:" > - : "+Q" (inc), "+m" (lock->slock) > - : > - : "memory", "cc"); > + register union { > + struct __raw_tickets tickets; > + unsigned short slock; > + } inc = { .slock = 1 << TICKET_SHIFT }; > + > + asm volatile (LOCK_PREFIX "xaddw %w0, %1\n" > + : "+Q" (inc), "+m" (lock->slock) : : "memory", "cc"); > + > + for (;;) { > + if (inc.tickets.head == inc.tickets.tail) > + return; > + cpu_relax(); > + inc.tickets.head = ACCESS_ONCE(lock->tickets.head); > + } > + barrier(); /* make sure nothing creeps before the lock is taken */ Isnt this barrier() never reached ? > } > > static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) > @@ -105,22 +105,22 @@ static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) > static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) > { > unsigned inc = 1 << TICKET_SHIFT; > - unsigned tmp; > + __ticket_t tmp; > > - asm volatile(LOCK_PREFIX "xaddl %0, %1\n" > - "movzwl %w0, %2\n\t" > - "shrl $16, %0\n\t" > - "1:\t" > - "cmpl %0, %2\n\t" > - "je 2f\n\t" > - "rep ; nop\n\t" > - "movzwl %1, %2\n\t" > - /* don't need lfence here, because loads are in-order */ > - "jmp 1b\n" > - "2:" > - : "+r" (inc), "+m" (lock->slock), "=&r" (tmp) > - : > - : "memory", "cc"); > + asm volatile(LOCK_PREFIX "xaddl %0, %1\n\t" > + : "+r" (inc), "+m" (lock->slock) > + : : "memory", "cc"); > + > + tmp = inc; > + inc >>= TICKET_SHIFT; > + > + for (;;) { > + if ((__ticket_t)inc == tmp) > + return; > + cpu_relax(); > + tmp = ACCESS_ONCE(lock->tickets.head); > + } > + barrier(); /* make sure nothing creeps before the lock is taken */ same here > } > > static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/virtualization