On 10/22/20 3:02 AM, Zhang,Qiang wrote: > > Hi Jens Axboe > > There are some problem in 'io_wqe_worker' thread, when the > 'io_wqe_worker' be create and Setting the affinity of CPUs in NUMA > nodes, due to CPU hotplug, When the last CPU going down, the > 'io_wqe_worker' thread will run anywhere. when the CPU in the node goes > online again, we should restore their cpu bindings? Something like the below should help in ensuring affinities are always correct - trigger an affinity set for an online CPU event. We should not need to do it for offlining. Can you test it? diff --git a/fs/io-wq.c b/fs/io-wq.c index 4012ff541b7b..3bf029d1170e 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -19,6 +19,7 @@ #include <linux/task_work.h> #include <linux/blk-cgroup.h> #include <linux/audit.h> +#include <linux/cpu.h> #include "io-wq.h" @@ -123,9 +124,13 @@ struct io_wq { refcount_t refs; struct completion done; + struct hlist_node cpuhp_node; + refcount_t use_refs; }; +static enum cpuhp_state io_wq_online; + static bool io_worker_get(struct io_worker *worker) { return refcount_inc_not_zero(&worker->ref); @@ -1096,6 +1101,13 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) return ERR_PTR(-ENOMEM); } + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); + if (ret) { + kfree(wq->wqes); + kfree(wq); + return ERR_PTR(ret); + } + wq->free_work = data->free_work; wq->do_work = data->do_work; @@ -1145,6 +1157,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data) ret = PTR_ERR(wq->manager); complete(&wq->done); err: + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); for_each_node(node) kfree(wq->wqes[node]); kfree(wq->wqes); @@ -1164,6 +1177,8 @@ static void __io_wq_destroy(struct io_wq *wq) { int node; + cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node); + set_bit(IO_WQ_BIT_EXIT, &wq->state); if (wq->manager) kthread_stop(wq->manager); @@ -1191,3 +1206,40 @@ struct task_struct *io_wq_get_task(struct io_wq *wq) { return wq->manager; } + +static bool io_wq_worker_affinity(struct io_worker *worker, void *data) +{ + struct task_struct *task = worker->task; + unsigned long flags; + + raw_spin_lock_irqsave(&task->pi_lock, flags); + do_set_cpus_allowed(task, cpumask_of_node(worker->wqe->node)); + task->flags |= PF_NO_SETAFFINITY; + raw_spin_unlock_irqrestore(&task->pi_lock, flags); + return false; +} + +static int io_wq_cpu_online(unsigned int cpu, struct hlist_node *node) +{ + struct io_wq *wq = hlist_entry_safe(node, struct io_wq, cpuhp_node); + int i; + + rcu_read_lock(); + for_each_node(i) + io_wq_for_each_worker(wq->wqes[i], io_wq_worker_affinity, NULL); + rcu_read_unlock(); + return 0; +} + +static __init int io_wq_init(void) +{ + int ret; + + ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "io-wq/online", + io_wq_cpu_online, NULL); + if (ret < 0) + return ret; + io_wq_online = ret; + return 0; +} +subsys_initcall(io_wq_init); -- Jens Axboe