Gabriel, when looking through this patch, I've noticed we can loose wakeups after your latest simplifications. See below for details: On Sat 05-11-22 19:10:55, Gabriel Krisman Bertazi wrote: > @@ -587,7 +571,7 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) > for (i = 0; i < SBQ_WAIT_QUEUES; i++) { > struct sbq_wait_state *ws = &sbq->ws[wake_index]; > > - if (waitqueue_active(&ws->wait) && atomic_read(&ws->wait_cnt)) { > + if (waitqueue_active(&ws->wait)) { > if (wake_index != atomic_read(&sbq->wake_index)) > atomic_set(&sbq->wake_index, wake_index); > return ws; Neither sbq_wake_ptr() nor sbitmap_queue_wake_up() now increment the wake_index after performing the wakeup. Thus we would effectively keep waking tasks from a single waitqueue until it becomes empty and only then go for the next waitqueue. This creates unnecessary unfairness in task wakeups and even possible starvation issues. So I think we need to advance wake_index somewhere. Perhaps here before returning waitqueue. > @@ -599,83 +583,31 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) > return NULL; > } > > -static bool __sbq_wake_up(struct sbitmap_queue *sbq, int *nr) > +void sbitmap_queue_wake_up(struct sbitmap_queue *sbq, int nr) > { > + unsigned int wake_batch = READ_ONCE(sbq->wake_batch); > + struct sbq_wait_state *ws = NULL; > + unsigned int wakeups; > > + if (!atomic_read(&sbq->ws_active)) > + return; > > + atomic_add(nr, &sbq->completion_cnt); > + wakeups = atomic_read(&sbq->wakeup_cnt); > > do { > + if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) > + return; > > + if (!ws) { > + ws = sbq_wake_ptr(sbq); > + if (!ws) > + return; > + } > + } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, > + &wakeups, wakeups + wake_batch)); > > wake_up_nr(&ws->wait, wake_batch); Now this may be also problematic - when we were checking the number of woken waiters in the older version of the patch (for others: internal version of the patch) this was fine but now it may happen that the 'ws' we have selected has no waiters anymore. And in that case we need to find another waitqueue because otherwise we'd be loosing too many wakeups and we could deadlock. So I think this rather needs to be something like: do { if (atomic_read(&sbq->completion_cnt) - wakeups < wake_batch) return; } while (!atomic_try_cmpxchg(&sbq->wakeup_cnt, &wakeups, wakeups + wake_batch)); do { ws = sbq_wake_ptr(sbq); if (!ws) return; } while (!wake_up_nr(&ws->wait, wake_batch)); with our original version of wake_up_nr() returning number of woken waiters. What do you think? Honza -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR