Commit-ID: be45bf5395e0886a93fc816bbe41a008ec2e42e2 Gitweb: https://git.kernel.org/tip/be45bf5395e0886a93fc816bbe41a008ec2e42e2 Author: Peter Zijlstra <peterz@xxxxxxxxxxxxx> AuthorDate: Fri, 13 Jul 2018 12:42:08 +0200 Committer: Ingo Molnar <mingo@xxxxxxxxxx> CommitDate: Sun, 15 Jul 2018 23:51:19 +0200 watchdog/softlockup: Fix cpu_stop_queue_work() double-queue bug When scheduling is delayed for longer than the softlockup interrupt period it is possible to double-queue the cpu_stop_work, causing list corruption. Cure this by adding a completion to track the cpu_stop_work's progress. Reported-by: kernel test robot <lkp@xxxxxxxxx> Tested-by: Rong Chen <rong.a.chen@xxxxxxxxx> Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Fixes: 9cf57731b63e ("watchdog/softlockup: Replace "watchdog/%u" threads with cpu_stop_work") Link: http://lkml.kernel.org/r/20180713104208.GW2494@xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx Signed-off-by: Ingo Molnar <mingo@xxxxxxxxxx> --- kernel/watchdog.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b81f777838d5..5470dce212c0 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -330,6 +330,9 @@ static void watchdog_interrupt_count(void) __this_cpu_inc(hrtimer_interrupts); } +static DEFINE_PER_CPU(struct completion, softlockup_completion); +static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); + /* * The watchdog thread function - touches the timestamp. * @@ -343,12 +346,11 @@ static int softlockup_fn(void *data) __this_cpu_write(soft_lockup_hrtimer_cnt, __this_cpu_read(hrtimer_interrupts)); __touch_watchdog(); + complete(this_cpu_ptr(&softlockup_completion)); return 0; } -static DEFINE_PER_CPU(struct cpu_stop_work, softlockup_stop_work); - /* watchdog kicker functions */ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) { @@ -364,9 +366,12 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) watchdog_interrupt_count(); /* kick the softlockup detector */ - stop_one_cpu_nowait(smp_processor_id(), - softlockup_fn, NULL, - this_cpu_ptr(&softlockup_stop_work)); + if (completion_done(this_cpu_ptr(&softlockup_completion))) { + reinit_completion(this_cpu_ptr(&softlockup_completion)); + stop_one_cpu_nowait(smp_processor_id(), + softlockup_fn, NULL, + this_cpu_ptr(&softlockup_stop_work)); + } /* .. and repeat */ hrtimer_forward_now(hrtimer, ns_to_ktime(sample_period)); @@ -467,9 +472,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) static void watchdog_enable(unsigned int cpu) { struct hrtimer *hrtimer = this_cpu_ptr(&watchdog_hrtimer); + struct completion *done = this_cpu_ptr(&softlockup_completion); WARN_ON_ONCE(cpu != smp_processor_id()); + init_completion(done); + complete(done); + /* * Start the timer first to prevent the NMI watchdog triggering * before the timer has a chance to fire. @@ -499,6 +508,7 @@ static void watchdog_disable(unsigned int cpu) */ watchdog_nmi_disable(cpu); hrtimer_cancel(hrtimer); + wait_for_completion(this_cpu_ptr(&softlockup_completion)); } static int softlockup_stop_fn(void *data) -- To unsubscribe from this list: send the line "unsubscribe linux-tip-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html