Re: [PATCH 3/8] workqueue: Implement BH workqueues to eventually replace tasklets

Lai Jiangshan <jiangshanlai@xxxxxxxxx> · Thu, 1 Feb 2024 19:02:27 +0800

Hello, Tejun

On Tue, Jan 30, 2024 at 5:16 PM Tejun Heo <tj@xxxxxxxxxx> wrote:

> @@ -1184,6 +1211,14 @@ static bool kick_pool(struct worker_pool *pool)
>         if (!need_more_worker(pool) || !worker)
>                 return false;
>
> +       if (pool->flags & POOL_BH) {
> +               if (pool->attrs->nice == HIGHPRI_NICE_LEVEL)
> +                       tasklet_hi_schedule(&worker->bh_tasklet);
> +               else
> +                       tasklet_schedule(&worker->bh_tasklet);
> +               return true;
> +       }

I think it is more straightforward to call bh_worker_taskletfn[_hi]()
in tasklet_action() and tasklet_hi_action() rather than add a
worker->bh_tasklet.

raise_softirq_irqoff() can be used here (kick_pool()) instead.

As the changelog said, once the conversion is complete, tasklet can be
removed and BH workqueues can directly take over the tasklet softirqs,
in which case, then, bh_worker_taskletfn[_hi]() can directly take over
the tasklet_action() and tasklet_hi_action().

> +
>         p = worker->task;
>
>  #ifdef CONFIG_SMP
> @@ -1663,8 +1698,16 @@ static bool pwq_tryinc_nr_active(struct pool_workqueue *pwq, bool fill)
>         lockdep_assert_held(&pool->lock);
>
>         if (!nna) {
> -               /* per-cpu workqueue, pwq->nr_active is sufficient */
> -               obtained = pwq->nr_active < READ_ONCE(wq->max_active);
> +               /*
> +                * BH workqueues always share a single execution context per CPU
> +                * and don't impose any max_active limit, so tryinc always
> +                * succeeds. For a per-cpu workqueue, checking pwq->nr_active is
> +                * sufficient.
> +                */
> +               if (wq->flags & WQ_BH)
> +                       obtained = true;
> +               else
> +                       obtained = pwq->nr_active < READ_ONCE(wq->max_active);

I think wq->max_active can be forced to be UINT_MAX or ULONG_MAX
in the max_active management code to avoid a branch here.

>                 goto out;
>         }
>
> @@ -2599,27 +2642,31 @@ static struct worker *create_worker(struct worker_pool *pool)
>
>         worker->id = id;
>
> -       if (pool->cpu >= 0)
> -               snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
> -                        pool->attrs->nice < 0  ? "H" : "");
> -       else
> -               snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
> -
> -       worker->task = kthread_create_on_node(worker_thread, worker, pool->node,
> -                                             "kworker/%s", id_buf);
> -       if (IS_ERR(worker->task)) {
> -               if (PTR_ERR(worker->task) == -EINTR) {
> -                       pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
> -                              id_buf);
> -               } else {
> -                       pr_err_once("workqueue: Failed to create a worker thread: %pe",
> -                                   worker->task);
> +       if (pool->flags & POOL_BH) {
> +               tasklet_setup(&worker->bh_tasklet, bh_worker_taskletfn);
> +       } else {
> +               if (pool->cpu >= 0)
> +                       snprintf(id_buf, sizeof(id_buf), "%d:%d%s", pool->cpu, id,
> +                                pool->attrs->nice < 0  ? "H" : "");
> +               else
> +                       snprintf(id_buf, sizeof(id_buf), "u%d:%d", pool->id, id);
> +
> +               worker->task = kthread_create_on_node(worker_thread, worker,
> +                                       pool->node, "kworker/%s", id_buf);
> +               if (IS_ERR(worker->task)) {
> +                       if (PTR_ERR(worker->task) == -EINTR) {
> +                               pr_err("workqueue: Interrupted when creating a worker thread \"kworker/%s\"\n",
> +                                      id_buf);
> +                       } else {
> +                               pr_err_once("workqueue: Failed to create a worker thread: %pe",
> +                                           worker->task);
> +                       }
> +                       goto fail;
>                 }
> -               goto fail;
> -       }
>
> -       set_user_nice(worker->task, pool->attrs->nice);
> -       kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
> +               set_user_nice(worker->task, pool->attrs->nice);
> +               kthread_bind_mask(worker->task, pool_allowed_cpus(pool));
> +       }
>
>         /* successful, attach the worker to the pool */
>         worker_attach_to_pool(worker, pool);

worker_attach_to_pool() and worker_detach_from_pool also access to
worker->task with kthread_set_per_cpu() and luckily to_kthread()
checks the NULL pointer for it.

IMO, it is better to avoid calling kthread_set_per_cpu() for bh workers.

> @@ -5605,7 +5731,12 @@ static void pr_cont_pool_info(struct worker_pool *pool)
>         pr_cont(" cpus=%*pbl", nr_cpumask_bits, pool->attrs->cpumask);
>         if (pool->node != NUMA_NO_NODE)
>                 pr_cont(" node=%d", pool->node);
> -       pr_cont(" flags=0x%x nice=%d", pool->flags, pool->attrs->nice);
> +       pr_cont(" flags=0x%x", pool->flags);
> +       if (pool->flags & POOL_BH)
> +               pr_cont(" bh%s",
> +                       pool->attrs->nice == HIGHPRI_NICE_LEVEL ? "-hi" : "");
> +       else
> +               pr_cont(" nice=%d", pool->attrs->nice);

There are also some "worker->task" in show_pwq(), show_one_worker_pool(),
and show_cpu_pool_hog() needing taking care of.

Thanks
Lai