On Fri, Oct 14, 2022 at 10:36:19AM +0200, Peter Zijlstra wrote: > > + Frederic; who actually does most of this code > > On Thu, Oct 13, 2022 at 03:40:28PM -0300, Leonardo Bras wrote: > > Housekeeping code keeps multiple cpumasks in order to keep track of which > > cpus can perform given housekeeping category. > > > > Every time the HK_TYPE_WQ cpumask is checked before queueing work at a cpu > > WQ it also happens to check for HK_TYPE_DOMAIN. So It can be assumed that > > the Domain isolation also ends up isolating work queues. > > > > Delegating current HK_TYPE_DOMAIN's work queue isolation to HK_TYPE_WQ > > makes it simpler to check if a cpu can run a task into an work queue, since > > code just need to go through a single HK_TYPE_* cpumask. > > > > Make isolcpus=domain aggregate both HK_TYPE_DOMAIN and HK_TYPE_WQ, and > > remove a lot of cpumask_and calls. > > > > Also, remove a unnecessary '|=' at housekeeping_isolcpus_setup() since we > > are sure that 'flags == 0' here. > > > > Signed-off-by: Leonardo Bras <leobras@xxxxxxxxxx> > > I've long maintained that having all these separate masks is daft; > Frederic do we really need that? Indeed. In my queue for the cpuset interface to nohz_full, I have the following patch (but note DOMAIN and WQ have to stay seperate flags because workqueue affinity can be modified seperately from isolcpus) --- From: Frederic Weisbecker <frederic@xxxxxxxxxx> Date: Tue, 26 Jul 2022 17:03:30 +0200 Subject: [PATCH] sched/isolation: Gather nohz_full related isolation features into common flag Signed-off-by: Frederic Weisbecker <frederic@xxxxxxxxxx> --- arch/x86/kvm/x86.c | 2 +- drivers/pci/pci-driver.c | 2 +- include/linux/sched/isolation.h | 7 +------ kernel/cpu.c | 4 ++-- kernel/kthread.c | 4 ++-- kernel/rcu/tasks.h | 2 +- kernel/rcu/tree_plugin.h | 6 +++--- kernel/sched/core.c | 10 +++++----- kernel/sched/fair.c | 6 +++--- kernel/sched/isolation.c | 25 +++++++------------------ kernel/watchdog.c | 2 +- kernel/workqueue.c | 2 +- net/core/net-sysfs.c | 2 +- 13 files changed, 29 insertions(+), 45 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1910e1e78b15..d0b73fcf4a1c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -9009,7 +9009,7 @@ int kvm_arch_init(void *opaque) } if (pi_inject_timer == -1) - pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER); + pi_inject_timer = housekeeping_enabled(HK_TYPE_NOHZ_FULL); #ifdef CONFIG_X86_64 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index 49238ddd39ee..af3494a39921 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -378,7 +378,7 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, goto out; } cpumask_and(wq_domain_mask, - housekeeping_cpumask(HK_TYPE_WQ), + housekeeping_cpumask(HK_TYPE_NOHZ_FULL), housekeeping_cpumask(HK_TYPE_DOMAIN)); cpu = cpumask_any_and(cpumask_of_node(node), diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 8c15abd67aed..7ca34e04abe7 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,15 +6,10 @@ #include <linux/tick.h> enum hk_type { - HK_TYPE_TIMER, - HK_TYPE_RCU, - HK_TYPE_MISC, + HK_TYPE_NOHZ_FULL, HK_TYPE_SCHED, - HK_TYPE_TICK, HK_TYPE_DOMAIN, - HK_TYPE_WQ, HK_TYPE_MANAGED_IRQ, - HK_TYPE_KTHREAD, HK_TYPE_MAX }; diff --git a/kernel/cpu.c b/kernel/cpu.c index bbad5e375d3b..573f14d75a2e 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1500,8 +1500,8 @@ int freeze_secondary_cpus(int primary) cpu_maps_update_begin(); if (primary == -1) { primary = cpumask_first(cpu_online_mask); - if (!housekeeping_cpu(primary, HK_TYPE_TIMER)) - primary = housekeeping_any_cpu(HK_TYPE_TIMER); + if (!housekeeping_cpu(primary, HK_TYPE_NOHZ_FULL)) + primary = housekeeping_any_cpu(HK_TYPE_NOHZ_FULL); } else { if (!cpu_online(primary)) primary = cpumask_first(cpu_online_mask); diff --git a/kernel/kthread.c b/kernel/kthread.c index 544fd4097406..0719035feba0 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -355,7 +355,7 @@ static int kthread(void *_create) * back to default in case they have been changed. */ sched_setscheduler_nocheck(current, SCHED_NORMAL, ¶m); - set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_KTHREAD)); + set_cpus_allowed_ptr(current, housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); /* OK, tell user we're spawned, wait for stop or wakeup */ __set_current_state(TASK_UNINTERRUPTIBLE); @@ -721,7 +721,7 @@ int kthreadd(void *unused) /* Setup a clean context for our children to inherit. */ set_task_comm(tsk, "kthreadd"); ignore_signals(tsk); - set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_KTHREAD)); + set_cpus_allowed_ptr(tsk, housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); set_mems_allowed(node_states[N_MEMORY]); current->flags |= PF_NOFREEZE; diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h index f5bf6fb430da..b99f79625b26 100644 --- a/kernel/rcu/tasks.h +++ b/kernel/rcu/tasks.h @@ -537,7 +537,7 @@ static int __noreturn rcu_tasks_kthread(void *arg) struct rcu_tasks *rtp = arg; /* Run on housekeeping CPUs by default. Sysadm can move if desired. */ - housekeeping_affine(current, HK_TYPE_RCU); + housekeeping_affine(current, HK_TYPE_NOHZ_FULL); WRITE_ONCE(rtp->kthread_ptr, current); // Let GPs start! /* diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index b2219577fbe2..4935b06c3caf 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -1237,9 +1237,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu) if ((mask & leaf_node_cpu_bit(rnp, cpu)) && cpu != outgoingcpu) cpumask_set_cpu(cpu, cm); - cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_RCU)); + cpumask_and(cm, cm, housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); if (cpumask_empty(cm)) - cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_RCU)); + cpumask_copy(cm, housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); set_cpus_allowed_ptr(t, cm); mutex_unlock(&rnp->boost_kthread_mutex); free_cpumask_var(cm); @@ -1294,5 +1294,5 @@ static void rcu_bind_gp_kthread(void) { if (!tick_nohz_full_enabled()) return; - housekeeping_affine(current, HK_TYPE_RCU); + housekeeping_affine(current, HK_TYPE_NOHZ_FULL); } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index f53c0096860b..5ff205f39197 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1079,13 +1079,13 @@ int get_nohz_timer_target(void) struct sched_domain *sd; const struct cpumask *hk_mask; - if (housekeeping_cpu(cpu, HK_TYPE_TIMER)) { + if (housekeeping_cpu(cpu, HK_TYPE_NOHZ_FULL)) { if (!idle_cpu(cpu)) return cpu; default_cpu = cpu; } - hk_mask = housekeeping_cpumask(HK_TYPE_TIMER); + hk_mask = housekeeping_cpumask(HK_TYPE_NOHZ_FULL); rcu_read_lock(); for_each_domain(cpu, sd) { @@ -1101,7 +1101,7 @@ int get_nohz_timer_target(void) } if (default_cpu == -1) - default_cpu = housekeeping_any_cpu(HK_TYPE_TIMER); + default_cpu = housekeeping_any_cpu(HK_TYPE_NOHZ_FULL); cpu = default_cpu; unlock: rcu_read_unlock(); @@ -5562,7 +5562,7 @@ static void sched_tick_start(int cpu) int os; struct tick_work *twork; - if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + if (housekeeping_cpu(cpu, HK_TYPE_NOHZ_FULL)) return; WARN_ON_ONCE(!tick_work_cpu); @@ -5583,7 +5583,7 @@ static void sched_tick_stop(int cpu) struct tick_work *twork; int os; - if (housekeeping_cpu(cpu, HK_TYPE_TICK)) + if (housekeeping_cpu(cpu, HK_TYPE_NOHZ_FULL)) return; WARN_ON_ONCE(!tick_work_cpu); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 77b2048a9326..ac3b33e00451 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -10375,7 +10375,7 @@ static inline int on_null_domain(struct rq *rq) * - When one of the busy CPUs notice that there may be an idle rebalancing * needed, they will kick the idle load balancer, which then does idle * load balancing for all the idle CPUs. - * - HK_TYPE_MISC CPUs are used for this task, because HK_TYPE_SCHED not set + * - HK_TYPE_NOHZ_FULL CPUs are used for this task, because HK_TYPE_SCHED not set * anywhere yet. */ @@ -10384,7 +10384,7 @@ static inline int find_new_ilb(void) int ilb; const struct cpumask *hk_mask; - hk_mask = housekeeping_cpumask(HK_TYPE_MISC); + hk_mask = housekeeping_cpumask(HK_TYPE_NOHZ_FULL); for_each_cpu_and(ilb, nohz.idle_cpus_mask, hk_mask) { @@ -10400,7 +10400,7 @@ static inline int find_new_ilb(void) /* * Kick a CPU to do the nohz balancing, if it is time for it. We pick any - * idle CPU in the HK_TYPE_MISC housekeeping set (if there is one). + * idle CPU in the HK_TYPE_NOHZ_FULL housekeeping set (if there is one). */ static void kick_ilb(unsigned int flags) { diff --git a/kernel/sched/isolation.c b/kernel/sched/isolation.c index 4087718ee5b4..443f1ce83e32 100644 --- a/kernel/sched/isolation.c +++ b/kernel/sched/isolation.c @@ -4,20 +4,15 @@ * any CPU: unbound workqueues, timers, kthreads and any offloadable work. * * Copyright (C) 2017 Red Hat, Inc., Frederic Weisbecker - * Copyright (C) 2017-2018 SUSE, Frederic Weisbecker + * Copyright (C) 2017-2022 SUSE, Frederic Weisbecker * */ enum hk_flags { - HK_FLAG_TIMER = BIT(HK_TYPE_TIMER), - HK_FLAG_RCU = BIT(HK_TYPE_RCU), - HK_FLAG_MISC = BIT(HK_TYPE_MISC), + HK_FLAG_NOHZ_FULL = BIT(HK_TYPE_NOHZ_FULL), HK_FLAG_SCHED = BIT(HK_TYPE_SCHED), - HK_FLAG_TICK = BIT(HK_TYPE_TICK), HK_FLAG_DOMAIN = BIT(HK_TYPE_DOMAIN), - HK_FLAG_WQ = BIT(HK_TYPE_WQ), HK_FLAG_MANAGED_IRQ = BIT(HK_TYPE_MANAGED_IRQ), - HK_FLAG_KTHREAD = BIT(HK_TYPE_KTHREAD), }; DEFINE_STATIC_KEY_FALSE(housekeeping_overridden); @@ -88,7 +83,7 @@ void __init housekeeping_init(void) static_branch_enable(&housekeeping_overridden); - if (housekeeping.flags & HK_FLAG_TICK) + if (housekeeping.flags & HK_FLAG_NOHZ_FULL) sched_tick_offload_init(); for_each_set_bit(type, &housekeeping.flags, HK_TYPE_MAX) { @@ -111,7 +106,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) cpumask_var_t non_housekeeping_mask, housekeeping_staging; int err = 0; - if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) { + if ((flags & HK_FLAG_NOHZ_FULL) && !(housekeeping.flags & HK_FLAG_NOHZ_FULL)) { if (!IS_ENABLED(CONFIG_NO_HZ_FULL)) { pr_warn("Housekeeping: nohz unsupported." " Build with CONFIG_NO_HZ_FULL\n"); @@ -163,7 +158,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) housekeeping_setup_type(type, housekeeping_staging); } - if ((flags & HK_FLAG_TICK) && !(housekeeping.flags & HK_FLAG_TICK)) + if ((flags & HK_FLAG_NOHZ_FULL) && !(housekeeping.flags & HK_FLAG_NOHZ_FULL)) tick_nohz_full_setup(non_housekeeping_mask); housekeeping.flags |= flags; @@ -179,12 +174,7 @@ static int __init housekeeping_setup(char *str, unsigned long flags) static int __init housekeeping_nohz_full_setup(char *str) { - unsigned long flags; - - flags = HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | HK_FLAG_RCU | - HK_FLAG_MISC | HK_FLAG_KTHREAD; - - return housekeeping_setup(str, flags); + return housekeeping_setup(str, HK_FLAG_NOHZ_FULL); } __setup("nohz_full=", housekeeping_nohz_full_setup); @@ -198,8 +188,7 @@ static int __init housekeeping_isolcpus_setup(char *str) while (isalpha(*str)) { if (!strncmp(str, "nohz,", 5)) { str += 5; - flags |= HK_FLAG_TICK | HK_FLAG_WQ | HK_FLAG_TIMER | - HK_FLAG_RCU | HK_FLAG_MISC | HK_FLAG_KTHREAD; + flags |= HK_FLAG_NOHZ_FULL; continue; } diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 20a7a55e62b6..3e9636f4bac6 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -852,7 +852,7 @@ void __init lockup_detector_init(void) pr_info("Disabling watchdog on nohz_full cores by default\n"); cpumask_copy(&watchdog_cpumask, - housekeeping_cpumask(HK_TYPE_TIMER)); + housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); if (!watchdog_nmi_probe()) nmi_watchdog_available = true; diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1ea50f6be843..3eb283d76d81 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -5993,7 +5993,7 @@ void __init workqueue_init_early(void) BUILD_BUG_ON(__alignof__(struct pool_workqueue) < __alignof__(long long)); BUG_ON(!alloc_cpumask_var(&wq_unbound_cpumask, GFP_KERNEL)); - cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_WQ)); + cpumask_copy(wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); cpumask_and(wq_unbound_cpumask, wq_unbound_cpumask, housekeeping_cpumask(HK_TYPE_DOMAIN)); pwq_cache = KMEM_CACHE(pool_workqueue, SLAB_PANIC); diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index e319e242dddf..6dddf359b754 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -852,7 +852,7 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, if (!cpumask_empty(mask)) { cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN)); - cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ)); + cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_NOHZ_FULL)); if (cpumask_empty(mask)) { free_cpumask_var(mask); return -EINVAL; -- 2.25.1