On 10/31/21 4:49 AM, Hao Xu wrote: > @@ -380,10 +382,14 @@ static void io_wqe_dec_running(struct io_worker *worker) > if (!(worker->flags & IO_WORKER_F_UP)) > return; > > + raw_spin_lock(&acct->lock); > if (atomic_dec_and_test(&acct->nr_running) && io_acct_run_queue(acct)) { > + raw_spin_unlock(&acct->lock); > atomic_inc(&acct->nr_running); > atomic_inc(&wqe->wq->worker_refs); > io_queue_worker_create(worker, acct, create_worker_cb); > + } else { > + raw_spin_unlock(&acct->lock); > } > } I think this may be more readable as: static void io_wqe_dec_running(struct io_worker *worker) __must_hold(wqe->lock) { struct io_wqe_acct *acct = io_wqe_get_acct(worker); struct io_wqe *wqe = worker->wqe; if (!(worker->flags & IO_WORKER_F_UP)) return; if (!atomic_dec_and_test(&acct->nr_running)) return; raw_spin_lock(&acct->lock); if (!io_acct_run_queue(acct)) { raw_spin_unlock(&acct->lock); return; } raw_spin_unlock(&acct->lock); atomic_inc(&acct->nr_running); atomic_inc(&wqe->wq->worker_refs); io_queue_worker_create(worker, acct, create_worker_cb); } ? Patch looks pretty sane to me, but there's a lot of lock shuffling going on for it. Like in io_worker_handle_work(), and particularly in io_worker_handle_work(). I think it'd be worthwhile to spend some time to see if that could be improved. These days, lock contention is more about frequency of lock grabbing rather than hold time. Maybe clean nesting of wqe->lock -> acct->lock (which would be natural) can help that? -- Jens Axboe