On 06/13/2018 11:50 AM, Peter Zijlstra wrote:
+ + lockdep_assert_held(&lock->wait_lock); + + if (owner && hold_ctx && __ww_ctx_stamp_after(hold_ctx, ww_ctx) && + ww_ctx->acquired > 0) { + WRITE_ONCE(hold_ctx->wounded, true); + if (owner != current) { + /* + * wake_up_process() inserts a write memory barrier toIt does no such thing. But yes, it does ensure the wakee sees all prior stores IFF the wakeup happened.+ * make sure owner sees it is wounded before + * TASK_RUNNING in case it's sleeping on another + * ww_mutex. Note that owner points to a valid + * task_struct as long as we hold the wait_lock. + */What exactly are you trying to say here ? I'm thinking this is the pairing barrier to the smp_mb() below, with your list_empty() thing? Might make sense to write a single coherent comment and refer to the other location.
So what I'm trying to say here is that wake_up_process() ensures that the owner, if in !TASK_RUNNING, sees the write to hold_ctx->wounded before the transition to TASK_RUNNING. This was how I interpreted "woken up" in the wake up process documentation.
+ wake_up_process(owner); + } + return true; + } + + return false; +} + /* * Wake up any waiters that may have to back off when the lock is held by the * given context. * * Due to the invariants on the wait list, this can only affect the first - * waiter with a context. + * waiter with a context, unless the Wound-Wait algorithm is used where + * also subsequent waiters with a context main wound the lock holder. * * The current task must not be on the wait list. */ @@ -303,6 +338,7 @@ static void __sched __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) { struct mutex_waiter *cur; + bool is_wait_die = ww_ctx->ww_class->is_wait_die;lockdep_assert_held(&lock->wait_lock); @@ -310,13 +346,14 @@ __ww_mutex_wakeup_for_backoff(struct mutex *lock, struct ww_acquire_ctx *ww_ctx)if (!cur->ww_ctx) continue;- if (cur->ww_ctx->acquired > 0 &&+ if (is_wait_die && cur->ww_ctx->acquired > 0 && __ww_ctx_stamp_after(cur->ww_ctx, ww_ctx)) { debug_mutex_wake_waiter(lock, cur); wake_up_process(cur->task); }- break;+ if (is_wait_die || __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) + break; } }@@ -338,12 +375,17 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx)* and keep spinning, or it will acquire wait_lock, add itself * to waiter list and sleep. */ - smp_mb(); /* ^^^ */ + smp_mb(); /* See comments above and below. *//*- * Check if lock is contended, if not there is nobody to wake up + * Check if lock is contended, if not there is nobody to wake up. + * Checking MUTEX_FLAG_WAITERS is not enough here,That seems like a superfluous thing to say. It makes sense in the context of this patch because we change the FLAG check into a list check, but the resulting comment/code looks odd.since we need to + * order against the lock->ctx check in __ww_mutex_wound called from + * __ww_mutex_add_waiter. We can use list_empty without taking the + * wait_lock, given the memory barrier above and the list_empty + * documentation.I don't trust documentation. Please reason about implementation.
Will do.
*/ - if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) + if (likely(list_empty(&lock->base.wait_list))) return;/*@@ -653,6 +695,17 @@ __ww_mutex_lock_check_stamp(struct mutex *lock, struct mutex_waiter *waiter, struct ww_acquire_ctx *hold_ctx = READ_ONCE(ww->ctx); struct mutex_waiter *cur;+ /*+ * If we miss a wounded == true here, we will have a pendingExplain how we can miss that.
This is actually the pairing location of the wake_up_process() comment / code discussed above. Here we should have !TASK_RUNNING, and let's say ctx->wounded is set by another process immediately after we've read it (we "miss" it). At that point there must be a pending wake-up-process() for us and we'll pick up the set value of wounded on the next iteration after returning from schedule().
+ * TASK_RUNNING and pick it up on the next schedule fall-through. + */ + if (!ctx->ww_class->is_wait_die) { + if (READ_ONCE(ctx->wounded)) + goto deadlock; + else + return 0; + } + if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) goto deadlock;@@ -683,12 +736,15 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,{ struct mutex_waiter *cur; struct list_head *pos; + bool is_wait_die;if (!ww_ctx) {list_add_tail(&waiter->list, &lock->wait_list); return 0; }+ is_wait_die = ww_ctx->ww_class->is_wait_die;+ /* * Add the waiter before the first waiter with a higher stamp. * Waiters without a context are skipped to avoid starving @@ -701,7 +757,7 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,if (__ww_ctx_stamp_after(ww_ctx, cur->ww_ctx)) {/* Back off immediately if necessary. */ - if (ww_ctx->acquired > 0) { + if (is_wait_die && ww_ctx->acquired > 0) { #ifdef CONFIG_DEBUG_MUTEXES struct ww_mutex *ww;@@ -721,13 +777,26 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter,* Wake up the waiter so that it gets a chance to back * off. */ - if (cur->ww_ctx->acquired > 0) { + if (is_wait_die && cur->ww_ctx->acquired > 0) { debug_mutex_wake_waiter(lock, cur); wake_up_process(cur->task); } }list_add_tail(&waiter->list, pos);+ if (!is_wait_die) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + + /* + * Make sure a racing lock taker sees a non-empty waiting list + * before we read ww->ctx, so that if we miss ww->ctx, the + * racing lock taker will call __ww_mutex_wake_up_for_backoff() + * and wound itself. + */ + smp_mb(); + __ww_mutex_wound(lock, ww_ctx, ww->ctx); + } + return 0; }@@ -750,6 +819,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; + + /* + * Reset the wounded flag after a backoff. + * No other process can race and wound us here since they + * can't have a valid owner pointer at this time + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = false; }preempt_disable();@@ -858,6 +935,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, acquired: __set_current_state(TASK_RUNNING);+ /* We stole the lock. Need to check wounded status. */+ if (use_ww_ctx && ww_ctx && !ww_ctx->ww_class->is_wait_die && + !__mutex_waiter_is_first(lock, &waiter)) + __ww_mutex_wakeup_for_backoff(lock, ww_ctx); + mutex_remove_waiter(lock, &waiter, current); if (likely(list_empty(&lock->wait_list))) __mutex_clear_flag(lock, MUTEX_FLAGS);I can't say I'm a fan. I'm already cursing the ww_mutex stuff every time I have to look at it, and you just made it worse spagethi.
Thanks for the review.Well, I can't speak for the current ww implementation except I didn't think it was too hard to understand for a first time reader.
Admittedly the Wound-Wait path makes it worse since it's a preemptive algorithm and we need to touch other processes a acquire contexts and worry about ordering.
So, assuming your review comments are fixed up, is that a solid NAK or do you have any suggestion that would make you more comfortable with the code? like splitting out ww-stuff to a separate file?
/Thomas