Dear RT folks! I'm pleased to announce the v5.15-rc2-rt3 patch set. Changes since v5.15-rc2-rt2: - Remove kernel_fpu_resched(). A few ciphers were restructured and this function has no users and can be removed. - The cpuset code is using spinlock_t again. Since the mm/slub rework there is need to use raw_spinlock_t. - Allow to enable CONFIG_RT_GROUP_SCHED on RT again. The original issue can not be reproduced. Please test and report any issue. - The RCU warning, that has been fixed Valentin Schneider, has been replaced with a patch by Thomas Gleixner. There is another issue open in that area an Frederick Weisbecker is looking into it. - RCU lock accounting and checking has been reworked by Thomas Gleixner. A direct effect is that might_sleep() produces a warning if invoked in a RCU read section. Previously it would only trigger a warning in schedule() in such a situation. - The preempt_*_nort() macros have been removed. - The preempt_enable_no_resched() macro should behave like preempt_enable() on PREEMPT_RT but was was misplaced in v3.14-rt1 and has has been corrected now. Known issues - netconsole triggers WARN. - The "Memory controller" (CONFIG_MEMCG) has been disabled. - Valentin Schneider reported a few splats on ARM64, see https://https://lkml.kernel.org/r/.kernel.org/lkml/20210810134127.1394269-1-valentin.schneider@xxxxxxx/ The delta patch against v5.15-rc2-rt2 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/incr/patch-5.15-rc2-rt2-rt3.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.15-rc2-rt3 The RT patch against v5.15-rc2 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patch-5.15-rc2-rt3.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.15/older/patches-5.15-rc2-rt3.tar.xz Sebastian diff --git a/arch/x86/include/asm/fpu/api.h b/arch/x86/include/asm/fpu/api.h index 62cf3e4c06fb1..23bef08a83880 100644 --- a/arch/x86/include/asm/fpu/api.h +++ b/arch/x86/include/asm/fpu/api.h @@ -28,7 +28,6 @@ extern void kernel_fpu_begin_mask(unsigned int kfpu_mask); extern void kernel_fpu_end(void); extern bool irq_fpu_usable(void); extern void fpregs_mark_activate(void); -extern void kernel_fpu_resched(void); /* Code that is unaware of kernel_fpu_begin_mask() can use this */ static inline void kernel_fpu_begin(void) diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e17fe40ee51e1..7ada7bd03a327 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -185,18 +185,6 @@ void kernel_fpu_end(void) } EXPORT_SYMBOL_GPL(kernel_fpu_end); -void kernel_fpu_resched(void) -{ - WARN_ON_FPU(!this_cpu_read(in_kernel_fpu)); - - if (should_resched(PREEMPT_OFFSET)) { - kernel_fpu_end(); - cond_resched(); - kernel_fpu_begin(); - } -} -EXPORT_SYMBOL_GPL(kernel_fpu_resched); - /* * Sync the FPU register state to current's memory register state when the * current task owns the FPU. The hardware register state is preserved. diff --git a/include/linux/preempt.h b/include/linux/preempt.h index af39859f02ee1..cf665d25838cf 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -122,9 +122,17 @@ * The preempt_count offset after spin_lock() */ #if !defined(CONFIG_PREEMPT_RT) -#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET +#define PREEMPT_LOCK_OFFSET PREEMPT_DISABLE_OFFSET +#define PREEMPT_LOCK_RESCHED_OFFSET PREEMPT_LOCK_OFFSET #else -#define PREEMPT_LOCK_OFFSET 0 +/* Locks on RT do not disable preemption */ +#define PREEMPT_LOCK_OFFSET 0 +/* + * spin/rw_lock() on RT implies rcu_read_lock(). The might_sleep() check in + * cond_resched*lock() has to take that into account because it checks for + * preempt_count() + rcu_preempt_depth(). + */ +#define PREEMPT_LOCK_RESCHED_OFFSET 1 #endif /* @@ -208,12 +216,12 @@ do { \ preempt_count_dec(); \ } while (0) -#ifdef CONFIG_PREEMPT_RT +#ifndef CONFIG_PREEMPT_RT # define preempt_enable_no_resched() sched_preempt_enable_no_resched() -# define preempt_check_resched_rt() preempt_check_resched() +# define preempt_check_resched_rt() barrier(); #else # define preempt_enable_no_resched() preempt_enable() -# define preempt_check_resched_rt() barrier(); +# define preempt_check_resched_rt() preempt_check_resched() #endif #define preemptible() (preempt_count() == 0 && !irqs_disabled()) @@ -333,13 +341,9 @@ do { \ #ifdef CONFIG_PREEMPT_RT # define preempt_disable_rt() preempt_disable() # define preempt_enable_rt() preempt_enable() -# define preempt_disable_nort() barrier() -# define preempt_enable_nort() barrier() #else # define preempt_disable_rt() barrier() # define preempt_enable_rt() barrier() -# define preempt_disable_nort() preempt_disable() -# define preempt_enable_nort() preempt_enable() #endif #ifdef CONFIG_PREEMPT_NOTIFIERS diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 48d00a4cf7de7..de6d1a21f113b 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -54,11 +54,6 @@ void __rcu_read_unlock(void); * types of kernel builds, the rcu_read_lock() nesting depth is unknowable. */ #define rcu_preempt_depth() READ_ONCE(current->rcu_read_lock_nesting) -#ifndef CONFIG_PREEMPT_RT -#define sched_rcu_preempt_depth() rcu_preempt_depth() -#else -static inline int sched_rcu_preempt_depth(void) { return 0; } -#endif #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -84,8 +79,6 @@ static inline int rcu_preempt_depth(void) return 0; } -#define sched_rcu_preempt_depth() rcu_preempt_depth() - #endif /* #else #ifdef CONFIG_PREEMPT_RCU */ /* Internal to kernel */ diff --git a/include/linux/sched.h b/include/linux/sched.h index a47a4969b7676..992a1e07a27e8 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2169,19 +2169,19 @@ extern int __cond_resched_lock(spinlock_t *lock); extern int __cond_resched_rwlock_read(rwlock_t *lock); extern int __cond_resched_rwlock_write(rwlock_t *lock); -#define cond_resched_lock(lock) ({ \ - ___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\ - __cond_resched_lock(lock); \ +#define cond_resched_lock(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSET); \ + __cond_resched_lock(lock); \ }) -#define cond_resched_rwlock_read(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_rwlock_read(lock); \ +#define cond_resched_rwlock_read(lock) ({ \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSET); \ + __cond_resched_rwlock_read(lock); \ }) -#define cond_resched_rwlock_write(lock) ({ \ - __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET); \ - __cond_resched_rwlock_write(lock); \ +#define cond_resched_rwlock_write(lock) ( { \ + __might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_RESCHED_OFFSET); \ + __cond_resched_rwlock_write(lock); \ }) static inline void cond_resched_rcu(void) diff --git a/init/Kconfig b/init/Kconfig index a42f126ea89e2..28fd7b8e8c7d6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1009,7 +1009,6 @@ config CFS_BANDWIDTH config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" depends on CGROUP_SCHED - depends on !PREEMPT_RT default n help This feature lets you explicitly allocate real CPU bandwidth diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 38530791e91e6..df1ccf4558f82 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -358,7 +358,7 @@ void cpuset_read_unlock(void) percpu_up_read(&cpuset_rwsem); } -static DEFINE_RAW_SPINLOCK(callback_lock); +static DEFINE_SPINLOCK(callback_lock); static struct workqueue_struct *cpuset_migrate_mm_wq; @@ -1308,7 +1308,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, * Newly added CPUs will be removed from effective_cpus and * newly deleted ones will be added back to effective_cpus. */ - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); if (adding) { cpumask_or(parent->subparts_cpus, parent->subparts_cpus, tmp->addmask); @@ -1331,7 +1331,7 @@ static int update_parent_subparts_cpumask(struct cpuset *cpuset, int cmd, if (old_prs != new_prs) cpuset->partition_root_state = new_prs; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); notify_partition_change(cpuset, old_prs, new_prs); return cmd == partcmd_update; @@ -1435,7 +1435,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) continue; rcu_read_unlock(); - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cpumask_copy(cp->effective_cpus, tmp->new_cpus); if (cp->nr_subparts_cpus && (new_prs != PRS_ENABLED)) { @@ -1469,7 +1469,7 @@ static void update_cpumasks_hier(struct cpuset *cs, struct tmpmasks *tmp) if (new_prs != old_prs) cp->partition_root_state = new_prs; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); notify_partition_change(cp, old_prs, new_prs); WARN_ON(!is_in_v2_mode() && @@ -1588,7 +1588,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, return -EINVAL; } - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed); /* @@ -1599,7 +1599,7 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, cs->cpus_allowed); cs->nr_subparts_cpus = cpumask_weight(cs->subparts_cpus); } - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); update_cpumasks_hier(cs, &tmp); @@ -1798,9 +1798,9 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) continue; rcu_read_unlock(); - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cp->effective_mems = *new_mems; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); WARN_ON(!is_in_v2_mode() && !nodes_equal(cp->mems_allowed, cp->effective_mems)); @@ -1868,9 +1868,9 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, if (retval < 0) goto done; - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cs->mems_allowed = trialcs->mems_allowed; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); @@ -1961,9 +1961,9 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs)) || (is_spread_page(cs) != is_spread_page(trialcs))); - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cs->flags = trialcs->flags; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) rebuild_sched_domains_locked(); @@ -2054,9 +2054,9 @@ static int update_prstate(struct cpuset *cs, int new_prs) rebuild_sched_domains_locked(); out: if (!err) { - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cs->partition_root_state = new_prs; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); notify_partition_change(cs, old_prs, new_prs); } @@ -2471,7 +2471,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) cpuset_filetype_t type = seq_cft(sf)->private; int ret = 0; - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); switch (type) { case FILE_CPULIST: @@ -2493,7 +2493,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, void *v) ret = -EINVAL; } - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); return ret; } @@ -2811,14 +2811,14 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) cpuset_inc(); - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { cpumask_copy(cs->effective_cpus, parent->effective_cpus); cs->effective_mems = parent->effective_mems; cs->use_parent_ecpus = true; parent->child_ecpus_count++; } - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags)) goto out_unlock; @@ -2845,12 +2845,12 @@ static int cpuset_css_online(struct cgroup_subsys_state *css) } rcu_read_unlock(); - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cs->mems_allowed = parent->mems_allowed; cs->effective_mems = parent->mems_allowed; cpumask_copy(cs->cpus_allowed, parent->cpus_allowed); cpumask_copy(cs->effective_cpus, parent->cpus_allowed); - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); out_unlock: percpu_up_write(&cpuset_rwsem); cpus_read_unlock(); @@ -2906,7 +2906,7 @@ static void cpuset_css_free(struct cgroup_subsys_state *css) static void cpuset_bind(struct cgroup_subsys_state *root_css) { percpu_down_write(&cpuset_rwsem); - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); if (is_in_v2_mode()) { cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask); @@ -2917,7 +2917,7 @@ static void cpuset_bind(struct cgroup_subsys_state *root_css) top_cpuset.mems_allowed = top_cpuset.effective_mems; } - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); percpu_up_write(&cpuset_rwsem); } @@ -3014,12 +3014,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs, { bool is_empty; - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cpumask_copy(cs->cpus_allowed, new_cpus); cpumask_copy(cs->effective_cpus, new_cpus); cs->mems_allowed = *new_mems; cs->effective_mems = *new_mems; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); /* * Don't call update_tasks_cpumask() if the cpuset becomes empty, @@ -3056,10 +3056,10 @@ hotplug_update_tasks(struct cpuset *cs, if (nodes_empty(*new_mems)) *new_mems = parent_cs(cs)->effective_mems; - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cpumask_copy(cs->effective_cpus, new_cpus); cs->effective_mems = *new_mems; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); if (cpus_updated) update_tasks_cpumask(cs); @@ -3126,10 +3126,10 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) if (is_partition_root(cs) && (cpumask_empty(&new_cpus) || (parent->partition_root_state == PRS_ERROR))) { if (cs->nr_subparts_cpus) { - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cs->nr_subparts_cpus = 0; cpumask_clear(cs->subparts_cpus); - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); compute_effective_cpumask(&new_cpus, cs, parent); } @@ -3147,9 +3147,9 @@ static void cpuset_hotplug_update_tasks(struct cpuset *cs, struct tmpmasks *tmp) NULL, tmp); old_prs = cs->partition_root_state; if (old_prs != PRS_ERROR) { - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); cs->partition_root_state = PRS_ERROR; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); notify_partition_change(cs, old_prs, PRS_ERROR); } } @@ -3231,7 +3231,7 @@ static void cpuset_hotplug_workfn(struct work_struct *work) /* synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); /* @@ -3251,17 +3251,17 @@ static void cpuset_hotplug_workfn(struct work_struct *work) } } cpumask_copy(top_cpuset.effective_cpus, &new_cpus); - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); /* we don't mess with cpumasks of tasks in top_cpuset */ } /* synchronize mems_allowed to N_MEMORY */ if (mems_updated) { - raw_spin_lock_irq(&callback_lock); + spin_lock_irq(&callback_lock); if (!on_dfl) top_cpuset.mems_allowed = new_mems; top_cpuset.effective_mems = new_mems; - raw_spin_unlock_irq(&callback_lock); + spin_unlock_irq(&callback_lock); update_tasks_nodemask(&top_cpuset); } @@ -3362,9 +3362,9 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask) { unsigned long flags; - raw_spin_lock_irqsave(&callback_lock, flags); + spin_lock_irqsave(&callback_lock, flags); guarantee_online_cpus(tsk, pmask); - raw_spin_unlock_irqrestore(&callback_lock, flags); + spin_unlock_irqrestore(&callback_lock, flags); } /** @@ -3435,11 +3435,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk) nodemask_t mask; unsigned long flags; - raw_spin_lock_irqsave(&callback_lock, flags); + spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); guarantee_online_mems(task_cs(tsk), &mask); rcu_read_unlock(); - raw_spin_unlock_irqrestore(&callback_lock, flags); + spin_unlock_irqrestore(&callback_lock, flags); return mask; } @@ -3531,14 +3531,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask) return true; /* Not hardwall and node outside mems_allowed: scan up cpusets */ - raw_spin_lock_irqsave(&callback_lock, flags); + spin_lock_irqsave(&callback_lock, flags); rcu_read_lock(); cs = nearest_hardwall_ancestor(task_cs(current)); allowed = node_isset(node, cs->mems_allowed); rcu_read_unlock(); - raw_spin_unlock_irqrestore(&callback_lock, flags); + spin_unlock_irqrestore(&callback_lock, flags); return allowed; } diff --git a/kernel/irq/spurious.c b/kernel/irq/spurious.c index ca4bdc53d6c74..02b2daf074414 100644 --- a/kernel/irq/spurious.c +++ b/kernel/irq/spurious.c @@ -447,10 +447,10 @@ MODULE_PARM_DESC(noirqdebug, "Disable irq lockup detection when true"); static int __init irqfixup_setup(char *str) { -#ifdef CONFIG_PREEMPT_RT - pr_warn("irqfixup boot option not supported w/ CONFIG_PREEMPT_RT\n"); - return 1; -#endif + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + pr_warn("irqfixup boot option not supported with PREEMPT_RT\n"); + return 1; + } irqfixup = 1; printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n"); printk(KERN_WARNING "This may impact system performance.\n"); @@ -463,10 +463,10 @@ module_param(irqfixup, int, 0644); static int __init irqpoll_setup(char *str) { -#ifdef CONFIG_PREEMPT_RT - pr_warn("irqpoll boot option not supported w/ CONFIG_PREEMPT_RT\n"); - return 1; -#endif + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + pr_warn("irqpoll boot option not supported with PREEMPT_RT\n"); + return 1; + } irqfixup = 2; printk(KERN_WARNING "Misrouted IRQ fixup and polling support " "enabled\n"); diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 839041f8460f9..1d1e85e317385 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -24,6 +24,14 @@ #define RT_MUTEX_BUILD_SPINLOCKS #include "rtmutex.c" +/* + * Use ___might_sleep() which skips the state check and take RCU nesting + * into account as spin/read/write_lock() can legitimately nest into an RCU + * read side critical section: + */ +#define rtlock_might_sleep() \ + ___might_sleep(__FILE__, __LINE__, rcu_preempt_depth()) + static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) { if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) @@ -32,7 +40,7 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) static __always_inline void __rt_spin_lock(spinlock_t *lock) { - ___might_sleep(__FILE__, __LINE__, 0); + rtlock_might_sleep(); rtlock_lock(&lock->lock); rcu_read_lock(); migrate_disable(); @@ -210,7 +218,7 @@ EXPORT_SYMBOL(rt_write_trylock); void __sched rt_read_lock(rwlock_t *rwlock) { - ___might_sleep(__FILE__, __LINE__, 0); + rtlock_might_sleep(); rwlock_acquire_read(&rwlock->dep_map, 0, 0, _RET_IP_); rwbase_read_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); rcu_read_lock(); @@ -220,7 +228,7 @@ EXPORT_SYMBOL(rt_read_lock); void __sched rt_write_lock(rwlock_t *rwlock) { - ___might_sleep(__FILE__, __LINE__, 0); + rtlock_might_sleep(); rwlock_acquire(&rwlock->dep_map, 0, 0, _RET_IP_); rwbase_write_lock(&rwlock->rwbase, TASK_RTLOCK_WAIT); rcu_read_lock(); diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index c13f63aa12e83..937b96ce1510a 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -80,7 +80,6 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_data, rcu_data) = { .dynticks = ATOMIC_INIT(1), #ifdef CONFIG_RCU_NOCB_CPU .cblist.flags = SEGCBLIST_SOFTIRQ_ONLY, - .nocb_local_lock = INIT_LOCAL_LOCK(nocb_local_lock), #endif }; static struct rcu_state rcu_state = { @@ -2279,13 +2278,13 @@ rcu_report_qs_rdp(struct rcu_data *rdp) { unsigned long flags; unsigned long mask; - bool needwake = false; - const bool offloaded = rcu_rdp_is_offloaded(rdp); + bool offloaded, needwake = false; struct rcu_node *rnp; WARN_ON_ONCE(rdp->cpu != smp_processor_id()); rnp = rdp->mynode; raw_spin_lock_irqsave_rcu_node(rnp, flags); + offloaded = rcu_rdp_is_offloaded(rdp); if (rdp->cpu_no_qs.b.norm || rdp->gp_seq != rnp->gp_seq || rdp->gpwrap) { @@ -2447,7 +2446,7 @@ static void rcu_do_batch(struct rcu_data *rdp) int div; bool __maybe_unused empty; unsigned long flags; - const bool offloaded = rcu_rdp_is_offloaded(rdp); + bool offloaded; struct rcu_head *rhp; struct rcu_cblist rcl = RCU_CBLIST_INITIALIZER(rcl); long bl, count = 0; @@ -2473,6 +2472,7 @@ static void rcu_do_batch(struct rcu_data *rdp) rcu_nocb_lock(rdp); WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); pending = rcu_segcblist_n_cbs(&rdp->cblist); + offloaded = rcu_rdp_is_offloaded(rdp); div = READ_ONCE(rcu_divisor); div = div < 0 ? 7 : div > sizeof(long) * 8 - 2 ? sizeof(long) * 8 - 2 : div; bl = max(rdp->blimit, pending >> div); @@ -2812,12 +2812,10 @@ static void rcu_cpu_kthread(unsigned int cpu) { unsigned int *statusp = this_cpu_ptr(&rcu_data.rcu_cpu_kthread_status); char work, *workp = this_cpu_ptr(&rcu_data.rcu_cpu_has_work); - struct rcu_data *rdp = this_cpu_ptr(&rcu_data); int spincnt; trace_rcu_utilization(TPS("Start CPU kthread@rcu_run")); for (spincnt = 0; spincnt < 10; spincnt++) { - rcu_nocb_local_lock(rdp); local_bh_disable(); *statusp = RCU_KTHREAD_RUNNING; local_irq_disable(); @@ -2827,7 +2825,6 @@ static void rcu_cpu_kthread(unsigned int cpu) if (work) rcu_core(); local_bh_enable(); - rcu_nocb_local_unlock(rdp); if (*workp == 0) { trace_rcu_utilization(TPS("End CPU kthread@rcu_wait")); *statusp = RCU_KTHREAD_WAITING; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index aa6831255fec6..305cf6aeb4086 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -210,8 +210,6 @@ struct rcu_data { struct timer_list nocb_timer; /* Enforce finite deferral. */ unsigned long nocb_gp_adv_time; /* Last call_rcu() CB adv (jiffies). */ - local_lock_t nocb_local_lock; - /* The following fields are used by call_rcu, hence own cacheline. */ raw_spinlock_t nocb_bypass_lock ____cacheline_internodealigned_in_smp; struct rcu_cblist nocb_bypass; /* Lock-contention-bypass CB list. */ @@ -447,8 +445,6 @@ static void rcu_nocb_unlock(struct rcu_data *rdp); static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, unsigned long flags); static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp); -static void rcu_nocb_local_lock(struct rcu_data *rdp); -static void rcu_nocb_local_unlock(struct rcu_data *rdp); #ifdef CONFIG_RCU_NOCB_CPU static void __init rcu_organize_nocb_kthreads(void); #define rcu_nocb_lock_irqsave(rdp, flags) \ diff --git a/kernel/rcu/tree_nocb.h b/kernel/rcu/tree_nocb.h index 42481a3ce4913..8fdf44f8523f2 100644 --- a/kernel/rcu/tree_nocb.h +++ b/kernel/rcu/tree_nocb.h @@ -21,11 +21,6 @@ static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp) return lockdep_is_held(&rdp->nocb_lock); } -static inline int rcu_lockdep_is_held_nocb_local(struct rcu_data *rdp) -{ - return lockdep_is_held(&rdp->nocb_local_lock); -} - static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) { /* Race on early boot between thread creation and assignment */ @@ -186,22 +181,6 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, } } -/* - * The invocation of rcu_core() within the RCU core kthreads remains preemptible - * under PREEMPT_RT, thus the offload state of a CPU could change while - * said kthreads are preempted. Prevent this from happening by protecting the - * offload state with a local_lock(). - */ -static void rcu_nocb_local_lock(struct rcu_data *rdp) -{ - local_lock(&rcu_data.nocb_local_lock); -} - -static void rcu_nocb_local_unlock(struct rcu_data *rdp) -{ - local_unlock(&rcu_data.nocb_local_lock); -} - /* Lockdep check that ->cblist may be safely accessed. */ static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) { @@ -969,7 +948,6 @@ static int rdp_offload_toggle(struct rcu_data *rdp, if (rdp->nocb_cb_sleep) rdp->nocb_cb_sleep = false; rcu_nocb_unlock_irqrestore(rdp, flags); - rcu_nocb_local_unlock(rdp); /* * Ignore former value of nocb_cb_sleep and force wake up as it could @@ -1001,7 +979,6 @@ static long rcu_nocb_rdp_deoffload(void *arg) pr_info("De-offloading %d\n", rdp->cpu); - rcu_nocb_local_lock(rdp); rcu_nocb_lock_irqsave(rdp, flags); /* * Flush once and for all now. This suffices because we are @@ -1084,7 +1061,6 @@ static long rcu_nocb_rdp_offload(void *arg) * Can't use rcu_nocb_lock_irqsave() while we are in * SEGCBLIST_SOFTIRQ_ONLY mode. */ - rcu_nocb_local_lock(rdp); raw_spin_lock_irqsave(&rdp->nocb_lock, flags); /* @@ -1432,11 +1408,6 @@ static inline int rcu_lockdep_is_held_nocb(struct rcu_data *rdp) return 0; } -static inline int rcu_lockdep_is_held_nocb_local(struct rcu_data *rdp) -{ - return 0; -} - static inline bool rcu_current_is_nocb_kthread(struct rcu_data *rdp) { return false; @@ -1459,16 +1430,6 @@ static void rcu_nocb_unlock_irqrestore(struct rcu_data *rdp, local_irq_restore(flags); } -/* No ->nocb_local_lock to acquire. */ -static void rcu_nocb_local_lock(struct rcu_data *rdp) -{ -} - -/* No ->nocb_local_lock to release. */ -static void rcu_nocb_local_unlock(struct rcu_data *rdp) -{ -} - /* Lockdep check that ->cblist may be safely accessed. */ static void rcu_lockdep_assert_cblist_protected(struct rcu_data *rdp) { diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 265cb799d340c..d070059163d70 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -13,45 +13,23 @@ #include "../locking/rtmutex_common.h" -/* - * Is a local read of the rdp's offloaded state safe and stable? - * See rcu_nocb_local_lock() & family. - */ -static inline bool rcu_local_offload_access_safe(struct rcu_data *rdp) -{ - if (!preemptible()) - return true; - - if (!is_migratable()) { - if (!IS_ENABLED(CONFIG_RCU_NOCB)) - return true; - - return rcu_lockdep_is_held_nocb_local(rdp); - } - - return false; -} - static bool rcu_rdp_is_offloaded(struct rcu_data *rdp) { /* - * In order to read the offloaded state of an rdp is a safe and stable - * way and prevent from its value to be changed under us, we must - * either... + * In order to read the offloaded state of an rdp is a safe + * and stable way and prevent from its value to be changed + * under us, we must either hold the barrier mutex, the cpu + * hotplug lock (read or write) or the nocb lock. Local + * non-preemptible reads are also safe. NOCB kthreads and + * timers have their own means of synchronization against the + * offloaded state updaters. */ RCU_LOCKDEP_WARN( - // ...hold the barrier mutex... !(lockdep_is_held(&rcu_state.barrier_mutex) || - // ... the cpu hotplug lock (read or write)... (IS_ENABLED(CONFIG_HOTPLUG_CPU) && lockdep_is_cpus_held()) || - // ... or the NOCB lock. rcu_lockdep_is_held_nocb(rdp) || - // Local reads still require the local state to remain stable - // (preemption disabled / local lock held) (rdp == this_cpu_ptr(&rcu_data) && - rcu_local_offload_access_safe(rdp)) || - // NOCB kthreads and timers have their own means of - // synchronization against the offloaded state updaters. + !(IS_ENABLED(CONFIG_PREEMPT_COUNT) && preemptible())) || rcu_current_is_nocb_kthread(rdp)), "Unsafe read of RCU_NOCB offloaded state" ); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 74260746865d8..261508bac047d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9547,7 +9547,7 @@ void __init sched_init(void) #ifdef CONFIG_DEBUG_ATOMIC_SLEEP static inline int preempt_count_equals(int preempt_offset) { - int nested = preempt_count() + sched_rcu_preempt_depth(); + int nested = preempt_count() + rcu_preempt_depth(); return (nested == preempt_offset); } diff --git a/localversion-rt b/localversion-rt index c3054d08a1129..1445cd65885cd 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt2 +-rt3