You don't happen to have a proper state diagram for this thing do you? I suppose I'm going to have to make one; this is all getting a bit unwieldy, and those xchg() + fixup things are hard to read. On Wed, Feb 26, 2014 at 10:14:23AM -0500, Waiman Long wrote: > +static inline int queue_spin_trylock_quick(struct qspinlock *lock, int qsval) > +{ > + union arch_qspinlock *qlock = (union arch_qspinlock *)lock; > + u16 old; > + > + /* > + * Fall into the quick spinning code path only if no one is waiting > + * or the lock is available. > + */ > + if (unlikely((qsval != _QSPINLOCK_LOCKED) && > + (qsval != _QSPINLOCK_WAITING))) > + return 0; > + > + old = xchg(&qlock->lock_wait, _QSPINLOCK_WAITING|_QSPINLOCK_LOCKED); > + > + if (old == 0) { > + /* > + * Got the lock, can clear the waiting bit now > + */ > + smp_u8_store_release(&qlock->wait, 0); So we just did an atomic op, and now you're trying to optimize this write. Why do you need a whole byte for that? Surely a cmpxchg loop with the right atomic op can't be _that_ much slower? Its far more readable and likely avoids that steal fail below as well. > + return 1; > + } else if (old == _QSPINLOCK_LOCKED) { > +try_again: > + /* > + * Wait until the lock byte is cleared to get the lock > + */ > + do { > + cpu_relax(); > + } while (ACCESS_ONCE(qlock->lock)); > + /* > + * Set the lock bit & clear the waiting bit > + */ > + if (cmpxchg(&qlock->lock_wait, _QSPINLOCK_WAITING, > + _QSPINLOCK_LOCKED) == _QSPINLOCK_WAITING) > + return 1; > + /* > + * Someone has steal the lock, so wait again > + */ > + goto try_again; That's just a fail.. steals should not ever be allowed. It's a fair lock after all. > + } else if (old == _QSPINLOCK_WAITING) { > + /* > + * Another task is already waiting while it steals the lock. > + * A bit of unfairness here won't change the big picture. > + * So just take the lock and return. > + */ > + return 1; > + } > + /* > + * Nothing need to be done if the old value is > + * (_QSPINLOCK_WAITING | _QSPINLOCK_LOCKED). > + */ > + return 0; > +} > @@ -296,6 +478,9 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, int qsval) > return; > } > > +#ifdef queue_code_xchg > + prev_qcode = queue_code_xchg(lock, my_qcode); > +#else > /* > * Exchange current copy of the queue node code > */ > @@ -329,6 +514,7 @@ void queue_spin_lock_slowpath(struct qspinlock *lock, int qsval) > } else > prev_qcode &= ~_QSPINLOCK_LOCKED; /* Clear the lock bit */ > my_qcode &= ~_QSPINLOCK_LOCKED; > +#endif /* queue_code_xchg */ > > if (prev_qcode) { > /* That's just horrible.. please just make the entire #else branch another version of that same queue_code_xchg() function. _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization