Dear RT folks! I'm pleased to announce the v5.0.21-rt13 patch set. Changes since v5.0.21-rt12: - A patch by Kirill Smelkov to avoid deadlock in the switchtec driver. - Rework of the hrtimer, timer and posix-timer cancelation interface on -RT. Instead of the swait/schedule interface we now have locks which are taken while timer is active. During the cancellation of an active timer the lock is acquired. The lock will then either PI-boost the timer or block and wait until the timer completed. The new code looks simpler and does not trigger a warning from rcu_note_context_switch() anymore like reported by Grygorii Strashko and Daniel Wagner. The patches were contributed by Anna-Maria Gleixner. - Drop a preempt_disable_rt() statement in get_nohz_timer_target(). The caller holds a lock which already disables preemption. - tasklet_kill() could deadlock since the softirq rework if the task invoking tasklet_kill() preempted the active tasklet. - in_softirq() (and related functions) did not work as expected since the softirq rework. - RCU_FAST_NO_HZ was disabled on RT because a timer was used in a bad context. After double checking this is no longer the case and the option can be enabled (but it depends on RCU_EXPERT so be careful). - The option "rcu.rcu_normal_after_boot=1" is set by default on RT. Now it is not possible to disable it on command line. Suggested by Paul E. McKenney. - Backport a patch from upstream to introduce user_access_{save,restore}() which is needed due to a backport made by stable. Known issues - rcutorture is currently broken on -RT. Reported by Juri Lelli. The delta patch against v5.0.21-rt12 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/incr/patch-5.0.21-rt12-rt13.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v5.0.21-rt13 The RT patch against v5.0.21 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patch-5.0.21-rt13.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/5.0/older/patches-5.0.21-rt13.tar.xz Sebastian diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index db333300bd4be..6cfe431710203 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -58,6 +58,23 @@ static __always_inline void stac(void) alternative("", __stringify(__ASM_STAC), X86_FEATURE_SMAP); } +static __always_inline unsigned long smap_save(void) +{ + unsigned long flags; + + asm volatile (ALTERNATIVE("", "pushf; pop %0; " __stringify(__ASM_CLAC), + X86_FEATURE_SMAP) + : "=rm" (flags) : : "memory", "cc"); + + return flags; +} + +static __always_inline void smap_restore(unsigned long flags) +{ + asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) + : : "g" (flags) : "memory", "cc"); +} + /* These macros can be used in asm() statements */ #define ASM_CLAC \ ALTERNATIVE("", __stringify(__ASM_CLAC), X86_FEATURE_SMAP) @@ -69,6 +86,9 @@ static __always_inline void stac(void) static inline void clac(void) { } static inline void stac(void) { } +static inline unsigned long smap_save(void) { return 0; } +static inline void smap_restore(unsigned long flags) { } + #define ASM_CLAC #define ASM_STAC diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 2b0dd1b9c2087..743e1a96cd6ea 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -720,6 +720,9 @@ static __must_check inline bool user_access_begin(const void __user *ptr, size_t #define user_access_begin(a,b) user_access_begin(a,b) #define user_access_end() __uaccess_end() +#define user_access_save() smap_save() +#define user_access_restore(x) smap_restore(x) + #define unsafe_put_user(x, ptr, label) \ __put_user_size((__typeof__(*(ptr)))(x), (ptr), sizeof(*(ptr)), label) diff --git a/drivers/pci/switch/switchtec.c b/drivers/pci/switch/switchtec.c index 80823ad221ba5..ba17eaa410f96 100644 --- a/drivers/pci/switch/switchtec.c +++ b/drivers/pci/switch/switchtec.c @@ -392,7 +392,7 @@ static int switchtec_dev_open(struct inode *inode, struct file *filp) return PTR_ERR(stuser); filp->private_data = stuser; - nonseekable_open(inode, filp); + stream_open(inode, filp); dev_dbg(&stdev->dev, "%s: %p\n", __func__, stuser); diff --git a/fs/timerfd.c b/fs/timerfd.c index 190cb85044112..86ce98700f323 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -471,10 +471,11 @@ static int do_timerfd_settime(int ufd, int flags, break; } spin_unlock_irq(&ctx->wqh.lock); + if (isalarm(ctx)) - hrtimer_wait_for_timer(&ctx->t.alarm.timer); + hrtimer_grab_expiry_lock(&ctx->t.alarm.timer); else - hrtimer_wait_for_timer(&ctx->t.tmr); + hrtimer_grab_expiry_lock(&ctx->t.tmr); } /* diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 34e9c6b74ae0a..c737e15ea6536 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -19,7 +19,6 @@ #include <linux/percpu.h> #include <linux/timer.h> #include <linux/timerqueue.h> -#include <linux/wait.h> struct hrtimer_clock_base; struct hrtimer_cpu_base; @@ -190,6 +189,8 @@ enum hrtimer_base_type { * @nr_retries: Total number of hrtimer interrupt retries * @nr_hangs: Total number of hrtimer interrupt hangs * @max_hang_time: Maximum time spent in hrtimer_interrupt + * @softirq_expiry_lock: Lock which is taken while softirq based hrtimer are + * expired * @expires_next: absolute time of the next event, is required for remote * hrtimer enqueue; it is the total first expiry time (hard * and soft hrtimer are taken into account) @@ -217,12 +218,10 @@ struct hrtimer_cpu_base { unsigned short nr_hangs; unsigned int max_hang_time; #endif + spinlock_t softirq_expiry_lock; ktime_t expires_next; struct hrtimer *next_timer; ktime_t softirq_expires_next; -#ifdef CONFIG_PREEMPT_RT_BASE - wait_queue_head_t wait; -#endif struct hrtimer *softirq_next_timer; struct hrtimer_clock_base clock_base[HRTIMER_MAX_CLOCK_BASES]; } ____cacheline_aligned; @@ -423,6 +422,7 @@ static inline void hrtimer_start(struct hrtimer *timer, ktime_t tim, extern int hrtimer_cancel(struct hrtimer *timer); extern int hrtimer_try_to_cancel(struct hrtimer *timer); +extern void hrtimer_grab_expiry_lock(const struct hrtimer *timer); static inline void hrtimer_start_expires(struct hrtimer *timer, enum hrtimer_mode mode) @@ -440,13 +440,6 @@ static inline void hrtimer_restart(struct hrtimer *timer) hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } -/* Softirq preemption could deadlock timer removal */ -#ifdef CONFIG_PREEMPT_RT_BASE - extern void hrtimer_wait_for_timer(const struct hrtimer *timer); -#else -# define hrtimer_wait_for_timer(timer) do { cpu_relax(); } while (0) -#endif - /* Query timers: */ extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust); @@ -472,7 +465,7 @@ static inline int hrtimer_is_queued(struct hrtimer *timer) * Helper function to check, whether the timer is running the callback * function */ -static inline int hrtimer_callback_running(const struct hrtimer *timer) +static inline int hrtimer_callback_running(struct hrtimer *timer) { return timer->base->running == timer; } diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9329de0d8bfdd..64762225e1756 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -596,7 +596,10 @@ static inline void tasklet_unlock(struct tasklet_struct *t) static inline void tasklet_unlock_wait(struct tasklet_struct *t) { - while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { barrier(); } + while (test_bit(TASKLET_STATE_RUN, &(t)->state)) { + local_bh_disable(); + local_bh_enable(); + } } #else #define tasklet_trylock(t) 1 diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 05cd7466d10a8..d3552a5bcc8b2 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -15,6 +15,7 @@ struct cpu_timer_list { u64 expires, incr; struct task_struct *task; int firing; + int firing_cpu; }; /* diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 6a4884268f4c9..d559e3a0379c2 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -80,14 +80,6 @@ #define hardirq_count() (preempt_count() & HARDIRQ_MASK) #define irq_count() (preempt_count() & (HARDIRQ_MASK | SOFTIRQ_MASK \ | NMI_MASK)) -#ifdef CONFIG_PREEMPT_RT_FULL - -long softirq_count(void); - -#else -#define softirq_count() (preempt_count() & SOFTIRQ_MASK) -#endif - /* * Are we doing bottom half or hardware interrupt processing? * @@ -102,12 +94,23 @@ long softirq_count(void); * should not be used in new code. */ #define in_irq() (hardirq_count()) -#define in_softirq() (softirq_count()) #define in_interrupt() (irq_count()) -#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) #define in_nmi() (preempt_count() & NMI_MASK) #define in_task() (!(preempt_count() & \ (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) +#ifdef CONFIG_PREEMPT_RT_FULL + +#define softirq_count() ((long)get_current()->softirq_count) +#define in_softirq() (softirq_count()) +#define in_serving_softirq() (get_current()->softirq_count & SOFTIRQ_OFFSET) + +#else + +#define softirq_count() (preempt_count() & SOFTIRQ_MASK) +#define in_softirq() (softirq_count()) +#define in_serving_softirq() (softirq_count() & SOFTIRQ_OFFSET) + +#endif /* * The preempt_count offset after preempt_disable(); diff --git a/include/linux/sched.h b/include/linux/sched.h index e1ea2ea52feb0..8c5bc47f934c3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -961,6 +961,9 @@ struct task_struct { int softirqs_enabled; int softirq_context; #endif +#ifdef CONFIG_PREEMPT_RT_FULL + int softirq_count; +#endif #ifdef CONFIG_LOCKDEP # define MAX_LOCK_DEPTH 48UL diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d3afc0f018147..7f7356e151ce3 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -270,6 +270,8 @@ extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); #define user_access_end() do { } while (0) #define unsafe_get_user(x, ptr, err) do { if (unlikely(__get_user(x, ptr))) goto err; } while (0) #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) goto err; } while (0) +static inline unsigned long user_access_save(void) { return 0UL; } +static inline void user_access_restore(unsigned long flags) { } #endif #ifdef CONFIG_HARDENED_USERCOPY diff --git a/kernel/rcu/Kconfig b/kernel/rcu/Kconfig index 7e36ea9b7b720..5f5a54714a7a3 100644 --- a/kernel/rcu/Kconfig +++ b/kernel/rcu/Kconfig @@ -172,7 +172,7 @@ config RCU_FANOUT_LEAF config RCU_FAST_NO_HZ bool "Accelerate last non-dyntick-idle CPU's grace periods" - depends on NO_HZ_COMMON && SMP && RCU_EXPERT && !PREEMPT_RT_FULL + depends on NO_HZ_COMMON && SMP && RCU_EXPERT default n help This option permits CPUs to enter dynticks-idle state even if diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index 3700b730ea55d..aae5968ec9ebb 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -69,7 +69,9 @@ module_param(rcu_expedited, int, 0); extern int rcu_normal; /* from sysctl */ module_param(rcu_normal, int, 0); static int rcu_normal_after_boot = IS_ENABLED(CONFIG_PREEMPT_RT_FULL); +#ifndef CONFIG_PREEMPT_RT_FULL module_param(rcu_normal_after_boot, int, 0); +#endif #endif /* #ifndef CONFIG_TINY_RCU */ #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/kernel/sched/core.c b/kernel/sched/core.c index a8493ff60b673..2bd114e788a10 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -570,14 +570,11 @@ void resched_cpu(int cpu) */ int get_nohz_timer_target(void) { - int i, cpu; + int i, cpu = smp_processor_id(); struct sched_domain *sd; - preempt_disable_rt(); - cpu = smp_processor_id(); - if (!idle_cpu(cpu) && housekeeping_cpu(cpu, HK_FLAG_TIMER)) - goto preempt_en_rt; + return cpu; rcu_read_lock(); for_each_domain(cpu, sd) { @@ -596,8 +593,6 @@ int get_nohz_timer_target(void) cpu = housekeeping_any_cpu(HK_FLAG_TIMER); unlock: rcu_read_unlock(); -preempt_en_rt: - preempt_enable_rt(); return cpu; } diff --git a/kernel/softirq.c b/kernel/softirq.c index 473369122ddd0..c4fae96f23c54 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -109,12 +109,6 @@ static bool ksoftirqd_running(unsigned long pending) static DEFINE_LOCAL_IRQ_LOCK(bh_lock); static DEFINE_PER_CPU(long, softirq_counter); -long softirq_count(void) -{ - return raw_cpu_read(softirq_counter); -} -EXPORT_SYMBOL(softirq_count); - void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) { unsigned long __maybe_unused flags; @@ -125,6 +119,7 @@ void __local_bh_disable_ip(unsigned long ip, unsigned int cnt) local_lock(bh_lock); soft_cnt = this_cpu_inc_return(softirq_counter); WARN_ON_ONCE(soft_cnt == 0); + current->softirq_count += SOFTIRQ_DISABLE_OFFSET; #ifdef CONFIG_TRACE_IRQFLAGS local_irq_save(flags); @@ -155,6 +150,7 @@ void _local_bh_enable(void) local_irq_restore(flags); #endif + current->softirq_count -= SOFTIRQ_DISABLE_OFFSET; if (!in_atomic()) local_unlock(bh_lock); } @@ -192,6 +188,7 @@ void __local_bh_enable_ip(unsigned long ip, unsigned int cnt) if (!in_atomic()) local_unlock(bh_lock); + current->softirq_count -= SOFTIRQ_DISABLE_OFFSET; preempt_check_resched(); } EXPORT_SYMBOL(__local_bh_enable_ip); @@ -365,7 +362,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) pending = local_softirq_pending(); account_irq_enter_time(current); -#ifndef CONFIG_PREEMPT_RT_FULL +#ifdef CONFIG_PREEMPT_RT_FULL + current->softirq_count |= SOFTIRQ_OFFSET; +#else __local_bh_disable_ip(_RET_IP_, SOFTIRQ_OFFSET); #endif in_hardirq = lockdep_softirq_start(); @@ -418,7 +417,9 @@ asmlinkage __visible void __softirq_entry __do_softirq(void) lockdep_softirq_end(in_hardirq); account_irq_exit_time(current); -#ifndef CONFIG_PREEMPT_RT_FULL +#ifdef CONFIG_PREEMPT_RT_FULL + current->softirq_count &= ~SOFTIRQ_OFFSET; +#else __local_bh_enable(SOFTIRQ_OFFSET); #endif WARN_ON_ONCE(in_interrupt()); @@ -468,7 +469,7 @@ void irq_enter(void) static inline void invoke_softirq(void) { - if (softirq_count() == 0) + if (this_cpu_read(softirq_counter) == 0) wakeup_softirqd(); } @@ -552,7 +553,7 @@ void raise_softirq_irqoff(unsigned int nr) * If were are not in BH-disabled section then we have to wake * ksoftirqd. */ - if (softirq_count() == 0) + if (this_cpu_read(softirq_counter) == 0) wakeup_softirqd(); } @@ -704,7 +705,8 @@ void tasklet_kill(struct tasklet_struct *t) while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) { do { - yield(); + local_bh_disable(); + local_bh_enable(); } while (test_bit(TASKLET_STATE_SCHED, &t->state)); } tasklet_unlock_wait(t); diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index f6cd4bed61846..9f17e011087eb 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -433,7 +433,7 @@ int alarm_cancel(struct alarm *alarm) int ret = alarm_try_to_cancel(alarm); if (ret >= 0) return ret; - hrtimer_wait_for_timer(&alarm->timer); + hrtimer_grab_expiry_lock(&alarm->timer); } } EXPORT_SYMBOL_GPL(alarm_cancel); diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 6e5d62bdebf22..2067f461b12a3 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -930,33 +930,16 @@ u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) } EXPORT_SYMBOL_GPL(hrtimer_forward); -#ifdef CONFIG_PREEMPT_RT_BASE -# define wake_up_timer_waiters(b) wake_up(&(b)->wait) - -/** - * hrtimer_wait_for_timer - Wait for a running timer - * - * @timer: timer to wait for - * - * The function waits in case the timers callback function is - * currently executed on the waitqueue of the timer base. The - * waitqueue is woken up after the timer callback function has - * finished execution. - */ -void hrtimer_wait_for_timer(const struct hrtimer *timer) +void hrtimer_grab_expiry_lock(const struct hrtimer *timer) { struct hrtimer_clock_base *base = timer->base; - if (base && base->cpu_base && - base->index >= HRTIMER_BASE_MONOTONIC_SOFT) - wait_event(base->cpu_base->wait, - !(hrtimer_callback_running(timer))); + if (base && base->cpu_base) { + spin_lock(&base->cpu_base->softirq_expiry_lock); + spin_unlock(&base->cpu_base->softirq_expiry_lock); + } } -#else -# define wake_up_timer_waiters(b) do { } while (0) -#endif - /* * enqueue_hrtimer - internal function to (re)start a timer * @@ -1191,7 +1174,7 @@ int hrtimer_cancel(struct hrtimer *timer) if (ret >= 0) return ret; - hrtimer_wait_for_timer(timer); + hrtimer_grab_expiry_lock(timer); } } EXPORT_SYMBOL_GPL(hrtimer_cancel); @@ -1495,6 +1478,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) unsigned long flags; ktime_t now; + spin_lock(&cpu_base->softirq_expiry_lock); raw_spin_lock_irqsave(&cpu_base->lock, flags); now = hrtimer_update_base(cpu_base); @@ -1504,7 +1488,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) hrtimer_update_softirq_timer(cpu_base, true); raw_spin_unlock_irqrestore(&cpu_base->lock, flags); - wake_up_timer_waiters(cpu_base); + spin_unlock(&cpu_base->softirq_expiry_lock); } #ifdef CONFIG_HIGH_RES_TIMERS @@ -1914,9 +1898,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) cpu_base->softirq_next_timer = NULL; cpu_base->expires_next = KTIME_MAX; cpu_base->softirq_expires_next = KTIME_MAX; -#ifdef CONFIG_PREEMPT_RT_BASE - init_waitqueue_head(&cpu_base->wait); -#endif + spin_lock_init(&cpu_base->softirq_expiry_lock); return 0; } diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index cb2b301d05490..d999294adeefd 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -213,7 +213,7 @@ int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue) /* We are sharing ->siglock with it_real_fn() */ if (hrtimer_try_to_cancel(timer) < 0) { spin_unlock_irq(&tsk->sighand->siglock); - hrtimer_wait_for_timer(&tsk->signal->real_timer); + hrtimer_grab_expiry_lock(timer); goto again; } expires = timeval_to_ktime(value->it_value); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a436ee592737a..5bb1edffe0d05 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -789,6 +789,7 @@ check_timers_list(struct list_head *timers, return t->expires; t->firing = 1; + t->firing_cpu = smp_processor_id(); list_move_tail(&t->entry, firing); } @@ -1131,6 +1132,20 @@ static inline int fastpath_timer_check(struct task_struct *tsk) return 0; } +static DEFINE_PER_CPU(spinlock_t, cpu_timer_expiry_lock) = __SPIN_LOCK_UNLOCKED(cpu_timer_expiry_lock); + +void cpu_timers_grab_expiry_lock(struct k_itimer *timer) +{ + int cpu = timer->it.cpu.firing_cpu; + + if (cpu >= 0) { + spinlock_t *expiry_lock = per_cpu_ptr(&cpu_timer_expiry_lock, cpu); + + spin_lock_irq(expiry_lock); + spin_unlock_irq(expiry_lock); + } +} + /* * This is called from the timer interrupt handler. The irq handler has * already updated our counts. We need to check if any timers fire now. @@ -1141,6 +1156,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk) LIST_HEAD(firing); struct k_itimer *timer, *next; unsigned long flags; + spinlock_t *expiry_lock; /* * The fast path checks that there are no expired thread or thread @@ -1149,6 +1165,9 @@ static void __run_posix_cpu_timers(struct task_struct *tsk) if (!fastpath_timer_check(tsk)) return; + expiry_lock = this_cpu_ptr(&cpu_timer_expiry_lock); + spin_lock(expiry_lock); + if (!lock_task_sighand(tsk, &flags)) return; /* @@ -1183,6 +1202,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk) list_del_init(&timer->it.cpu.entry); cpu_firing = timer->it.cpu.firing; timer->it.cpu.firing = 0; + timer->it.cpu.firing_cpu = -1; /* * The firing flag is -1 if we collided with a reset * of the timer, which already reported this @@ -1192,6 +1212,7 @@ static void __run_posix_cpu_timers(struct task_struct *tsk) cpu_timer_fire(timer); spin_unlock(&timer->it_lock); } + spin_unlock(expiry_lock); } #ifdef CONFIG_PREEMPT_RT_BASE @@ -1457,6 +1478,8 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags, spin_unlock_irq(&timer.it_lock); while (error == TIMER_RETRY) { + + cpu_timers_grab_expiry_lock(&timer); /* * We need to handle case when timer was or is in the * middle of firing. In other cases we already freed diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index a4f57a1ea0df2..2c1ca3cc391b2 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -800,27 +800,22 @@ static void common_hrtimer_arm(struct k_itimer *timr, ktime_t expires, hrtimer_start_expires(timer, HRTIMER_MODE_ABS); } -/* - * Protected by RCU! - */ -static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timr) -{ -#ifdef CONFIG_PREEMPT_RT_FULL - if (kc->timer_arm == common_hrtimer_arm) - hrtimer_wait_for_timer(&timr->it.real.timer); - else if (kc == &alarm_clock) - hrtimer_wait_for_timer(&timr->it.alarm.alarmtimer.timer); - else - /* FIXME: Whacky hack for posix-cpu-timers */ - schedule_timeout(1); -#endif -} - static int common_hrtimer_try_to_cancel(struct k_itimer *timr) { return hrtimer_try_to_cancel(&timr->it.real.timer); } +static void timer_wait_for_callback(const struct k_clock *kc, struct k_itimer *timer) +{ + if (kc->timer_arm == common_hrtimer_arm) + hrtimer_grab_expiry_lock(&timer->it.real.timer); + else if (kc == &alarm_clock) + hrtimer_grab_expiry_lock(&timer->it.alarm.alarmtimer.timer); + else + /* posix-cpu-timers */ + cpu_timers_grab_expiry_lock(timer); +} + /* Set a POSIX.1b interval timer. */ int common_timer_set(struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, @@ -880,21 +875,21 @@ static int do_timer_settime(timer_t timer_id, int flags, if (!timr) return -EINVAL; - rcu_read_lock(); kc = timr->kclock; if (WARN_ON_ONCE(!kc || !kc->timer_set)) error = -EINVAL; else error = kc->timer_set(timr, flags, new_spec64, old_spec64); - unlock_timer(timr, flag); if (error == TIMER_RETRY) { + rcu_read_lock(); + unlock_timer(timr, flag); timer_wait_for_callback(kc, timr); - old_spec64 = NULL; // We already got the old time... rcu_read_unlock(); + old_spec64 = NULL; // We already got the old time... goto retry; } - rcu_read_unlock(); + unlock_timer(timr, flag); return error; } @@ -956,13 +951,21 @@ int common_timer_del(struct k_itimer *timer) return 0; } -static inline int timer_delete_hook(struct k_itimer *timer) +static int timer_delete_hook(struct k_itimer *timer) { const struct k_clock *kc = timer->kclock; + int ret; if (WARN_ON_ONCE(!kc || !kc->timer_del)) return -EINVAL; - return kc->timer_del(timer); + ret = kc->timer_del(timer); + if (ret == TIMER_RETRY) { + rcu_read_lock(); + spin_unlock_irq(&timer->it_lock); + timer_wait_for_callback(kc, timer); + rcu_read_unlock(); + } + return ret; } /* Delete a POSIX.1b interval timer. */ @@ -976,15 +979,8 @@ SYSCALL_DEFINE1(timer_delete, timer_t, timer_id) if (!timer) return -EINVAL; - rcu_read_lock(); - if (timer_delete_hook(timer) == TIMER_RETRY) { - unlock_timer(timer, flags); - timer_wait_for_callback(clockid_to_kclock(timer->it_clock), - timer); - rcu_read_unlock(); + if (timer_delete_hook(timer) == TIMER_RETRY) goto retry_delete; - } - rcu_read_unlock(); spin_lock(¤t->sighand->siglock); list_del(&timer->list); @@ -1010,20 +1006,9 @@ static void itimer_delete(struct k_itimer *timer) retry_delete: spin_lock_irqsave(&timer->it_lock, flags); - /* On RT we can race with a deletion */ - if (!timer->it_signal) { - unlock_timer(timer, flags); - return; - } - - if (timer_delete_hook(timer) == TIMER_RETRY) { - rcu_read_lock(); - unlock_timer(timer, flags); - timer_wait_for_callback(clockid_to_kclock(timer->it_clock), - timer); - rcu_read_unlock(); + if (timer_delete_hook(timer) == TIMER_RETRY) goto retry_delete; - } + list_del(&timer->list); /* * This keeps any tasks waiting on the spin lock from thinking diff --git a/kernel/time/posix-timers.h b/kernel/time/posix-timers.h index ddb21145211a0..725bd230a8db4 100644 --- a/kernel/time/posix-timers.h +++ b/kernel/time/posix-timers.h @@ -32,6 +32,8 @@ extern const struct k_clock clock_process; extern const struct k_clock clock_thread; extern const struct k_clock alarm_clock; +extern void cpu_timers_grab_expiry_lock(struct k_itimer *timer); + int posix_timer_event(struct k_itimer *timr, int si_private); void common_timer_get(struct k_itimer *timr, struct itimerspec64 *cur_setting); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 227dba00dd0ef..0b5f07c2fa834 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -43,7 +43,6 @@ #include <linux/sched/debug.h> #include <linux/slab.h> #include <linux/compat.h> -#include <linux/swait.h> #include <linux/uaccess.h> #include <asm/unistd.h> @@ -197,9 +196,7 @@ EXPORT_SYMBOL(jiffies_64); struct timer_base { raw_spinlock_t lock; struct timer_list *running_timer; -#ifdef CONFIG_PREEMPT_RT_FULL - struct swait_queue_head wait_for_running_timer; -#endif + spinlock_t expiry_lock; unsigned long clk; unsigned long next_expiry; unsigned int cpu; @@ -1181,33 +1178,6 @@ void add_timer_on(struct timer_list *timer, int cpu) } EXPORT_SYMBOL_GPL(add_timer_on); -#ifdef CONFIG_PREEMPT_RT_FULL -/* - * Wait for a running timer - */ -static void wait_for_running_timer(struct timer_list *timer) -{ - struct timer_base *base; - u32 tf = timer->flags; - - if (tf & TIMER_MIGRATING) - return; - - base = get_timer_base(tf); - swait_event_exclusive(base->wait_for_running_timer, - base->running_timer != timer); -} - -# define wakeup_timer_waiters(b) swake_up_all(&(b)->wait_for_running_timer) -#else -static inline void wait_for_running_timer(struct timer_list *timer) -{ - cpu_relax(); -} - -# define wakeup_timer_waiters(b) do { } while (0) -#endif - /** * del_timer - deactivate a timer. * @timer: the timer to be deactivated @@ -1237,6 +1207,25 @@ int del_timer(struct timer_list *timer) } EXPORT_SYMBOL(del_timer); +static int __try_to_del_timer_sync(struct timer_list *timer, + struct timer_base **basep) +{ + struct timer_base *base; + unsigned long flags; + int ret = -1; + + debug_assert_init(timer); + + *basep = base = lock_timer_base(timer, &flags); + + if (base->running_timer != timer) + ret = detach_if_pending(timer, base, true); + + raw_spin_unlock_irqrestore(&base->lock, flags); + + return ret; +} + /** * try_to_del_timer_sync - Try to deactivate a timer * @timer: timer to delete @@ -1247,23 +1236,31 @@ EXPORT_SYMBOL(del_timer); int try_to_del_timer_sync(struct timer_list *timer) { struct timer_base *base; - unsigned long flags; - int ret = -1; - debug_assert_init(timer); - - base = lock_timer_base(timer, &flags); - - if (base->running_timer != timer) - ret = detach_if_pending(timer, base, true); - - raw_spin_unlock_irqrestore(&base->lock, flags); - - return ret; + return __try_to_del_timer_sync(timer, &base); } EXPORT_SYMBOL(try_to_del_timer_sync); #if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT_RT_FULL) +static int __del_timer_sync(struct timer_list *timer) +{ + struct timer_base *base; + int ret; + + for (;;) { + ret = __try_to_del_timer_sync(timer, &base); + if (ret >= 0) + return ret; + + /* + * When accessing the lock, timers of base are no longer expired + * and so timer is no longer running. + */ + spin_lock(&base->expiry_lock); + spin_unlock(&base->expiry_lock); + } +} + /** * del_timer_sync - deactivate a timer and wait for the handler to finish. * @timer: the timer to be deactivated @@ -1319,12 +1316,8 @@ int del_timer_sync(struct timer_list *timer) * could lead to deadlock. */ WARN_ON(in_irq() && !(timer->flags & TIMER_IRQSAFE)); - for (;;) { - int ret = try_to_del_timer_sync(timer); - if (ret >= 0) - return ret; - wait_for_running_timer(timer); - } + + return __del_timer_sync(timer); } EXPORT_SYMBOL(del_timer_sync); #endif @@ -1389,11 +1382,15 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) raw_spin_unlock(&base->lock); call_timer_fn(timer, fn); base->running_timer = NULL; + spin_unlock(&base->expiry_lock); + spin_lock(&base->expiry_lock); raw_spin_lock(&base->lock); } else { raw_spin_unlock_irq(&base->lock); call_timer_fn(timer, fn); base->running_timer = NULL; + spin_unlock(&base->expiry_lock); + spin_lock(&base->expiry_lock); raw_spin_lock_irq(&base->lock); } } @@ -1688,6 +1685,7 @@ static inline void __run_timers(struct timer_base *base) if (!time_after_eq(jiffies, base->clk)) return; + spin_lock(&base->expiry_lock); raw_spin_lock_irq(&base->lock); /* @@ -1715,7 +1713,7 @@ static inline void __run_timers(struct timer_base *base) expire_timers(base, heads + levels); } raw_spin_unlock_irq(&base->lock); - wakeup_timer_waiters(base); + spin_unlock(&base->expiry_lock); } /* @@ -1962,9 +1960,7 @@ static void __init init_timer_cpu(int cpu) base->cpu = cpu; raw_spin_lock_init(&base->lock); base->clk = jiffies; -#ifdef CONFIG_PREEMPT_RT_FULL - init_swait_queue_head(&base->wait_for_running_timer); -#endif + spin_lock_init(&base->expiry_lock); } } diff --git a/localversion-rt b/localversion-rt index 6e44e540b927b..9f7d0bdbffb18 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt12 +-rt13