Hello, Tejun On Tue, Jan 30, 2024 at 5:16 PM Tejun Heo <tj@xxxxxxxxxx> wrote: > @@ -1184,6 +1211,14 @@ static bool kick_pool(struct worker_pool *pool) > if (!need_more_worker(pool) || !worker) > return false; > > + if (pool->flags & POOL_BH) { > + if (pool->attrs->nice == HIGHPRI_NICE_LEVEL) > + tasklet_hi_schedule(&worker->bh_tasklet); > + else > + tasklet_schedule(&worker->bh_tasklet); > + return true; > + } I think it is more straightforward to call bh_worker_taskletfn[_hi]() in tasklet_action() and tasklet_hi_action() rather than add a worker->bh_tasklet. raise_softirq_irqoff() can be used here (kick_pool()) instead. As the changelog said, once the conversion is complete, tasklet can be removed and BH workqueues can directly take over the tasklet softirqs, in which case, then, bh_worker_taskletfn[_hi]() can directly take over the tasklet_action() and tasklet_hi_action(). > + > p = worker->task; > > #ifdef CONFIG_SMP > @@ -1663,8 +1698,16 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill) > lockdep_assert_held(&pool->lock); > > if (!nna) { > - /* per-cpu workqueue, pwq->nr_active is sufficient */ > - obtained = pwq->nr_active < READ_ONCE(wq->max_active); > + /* > + * BH workqueues always share a single execution context per CPU > + * and don't impose any max_active limit, so tryinc always > + * succeeds. For a per-cpu workqueue, checking pwq->nr_active is > + * sufficient. > + */ > + if (wq->flags & WQ_BH) > + obtained = true; > + else > + obtained = pwq->nr_active < READ_ONCE(wq->max_active); I think wq->max_active can be forced to be UINT_MAX or ULONG_MAX in the max_active management code to avoid a branch here. > goto out; > } > > @@ -2599,27 +2642,31 @@ static struct worker *create_worker(struct worker_pool *pool) > > worker->id = id; > > - if (pool->cpu >= 0) > - snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id, > - pool->attrs->nice < 0 ? "H" : ""); > - else > - snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id); > - > - worker->task = kthread_create_on_node(worker_thread, worker, pool->node, > - "kworker/%s", id_buf); > - if (IS_ERR(worker->task)) { > - if (PTR_ERR(worker->task) == -EINTR) { > - pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n", > - id_buf); > - } else { > - pr_err_once("workqueue: Failed to create a worker thread: %pe", > - worker->task); > + if (pool->flags & POOL_BH) { > + tasklet_setup(&worker->bh_tasklet, bh_worker_taskletfn); > + } else { > + if (pool->cpu >= 0) > + snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id, > + pool->attrs->nice < 0 ? "H" : ""); > + else > + snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id); > + > + worker->task = kthread_create_on_node(worker_thread, worker, > + pool->node, "kworker/%s", id_buf); > + if (IS_ERR(worker->task)) { > + if (PTR_ERR(worker->task) == -EINTR) { > + pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n", > + id_buf); > + } else { > + pr_err_once("workqueue: Failed to create a worker thread: %pe", > + worker->task); > + } > + goto fail; > } > - goto fail; > - } > > - set_user_nice(worker->task, pool->attrs->nice); > - kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); > + set_user_nice(worker->task, pool->attrs->nice); > + kthread_bind_mask(worker->task, pool_allowed_cpus(pool)); > + } > > /* successful, attach the worker to the pool */ > worker_attach_to_pool(worker, pool); worker_attach_to_pool() and worker_detach_from_pool also access to worker->task with kthread_set_per_cpu() and luckily to_kthread() checks the NULL pointer for it. IMO, it is better to avoid calling kthread_set_per_cpu() for bh workers. > @@ -5605,7 +5731,12 @@ static void pr_cont_pool_info(struct worker_pool *pool) > pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask); > if (pool->node != NUMA_NO_NODE) > pr_cont(" node=%d", pool->node); > - pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice); > + pr_cont(" flags=0x%x", pool->flags); > + if (pool->flags & POOL_BH) > + pr_cont(" bh%s", > + pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : ""); > + else > + pr_cont(" nice=%d", pool->attrs->nice); There are also some "worker->task" in show_pwq(), show_one_worker_pool(), and show_cpu_pool_hog() needing taking care of. Thanks Lai