FAILED: patch "[PATCH] kernel/watchdog: Prevent false positives with turbo modes" failed to apply to 4.4-stable tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch below does not apply to the 4.4-stable tree.
If someone wants it applied there, or to any other stable or longterm
tree, then please email the backport, including the original git commit
id to <stable@xxxxxxxxxxxxxxx>.

thanks,

greg k-h

------------------ original commit in Linus's tree ------------------

>From 7edaeb6841dfb27e362288ab8466ebdc4972e867 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Date: Tue, 15 Aug 2017 09:50:13 +0200
Subject: [PATCH] kernel/watchdog: Prevent false positives with turbo modes

The hardlockup detector on x86 uses a performance counter based on unhalted
CPU cycles and a periodic hrtimer. The hrtimer period is about 2/5 of the
performance counter period, so the hrtimer should fire 2-3 times before the
performance counter NMI fires. The NMI code checks whether the hrtimer
fired since the last invocation. If not, it assumess a hard lockup.

The calculation of those periods is based on the nominal CPU
frequency. Turbo modes increase the CPU clock frequency and therefore
shorten the period of the perf/NMI watchdog. With extreme Turbo-modes (3x
nominal frequency) the perf/NMI period is shorter than the hrtimer period
which leads to false positives.

A simple fix would be to shorten the hrtimer period, but that comes with
the side effect of more frequent hrtimer and softlockup thread wakeups,
which is not desired.

Implement a low pass filter, which checks the perf/NMI period against
kernel time. If the perf/NMI fires before 4/5 of the watchdog period has
elapsed then the event is ignored and postponed to the next perf/NMI.

That solves the problem and avoids the overhead of shorter hrtimer periods
and more frequent softlockup thread wakeups.

Fixes: 58687acba592 ("lockup_detector: Combine nmi_watchdog and softlockup detector")
Reported-and-tested-by: Kan Liang <Kan.liang@xxxxxxxxx>
Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: dzickus@xxxxxxxxxx
Cc: prarit@xxxxxxxxxx
Cc: ak@xxxxxxxxxxxxxxx
Cc: babu.moger@xxxxxxxxxx
Cc: peterz@xxxxxxxxxxxxx
Cc: eranian@xxxxxxxxxx
Cc: acme@xxxxxxxxxx
Cc: stable@xxxxxxxxxxxxxxx
Cc: atomlin@xxxxxxxxxx
Cc: akpm@xxxxxxxxxxxxxxxxxxxx
Cc: torvalds@xxxxxxxxxxxxxxxxxxxx
Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1708150931310.1886@nanos

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 781521b7cf9e..9101bfc85539 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -100,6 +100,7 @@ config X86
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
 	select GENERIC_TIME_VSYSCALL
+	select HARDLOCKUP_CHECK_TIMESTAMP	if X86_64
 	select HAVE_ACPI_APEI			if ACPI
 	select HAVE_ACPI_APEI_NMI		if ACPI
 	select HAVE_ALIGNED_STRUCT_PAGE		if SLUB
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 8aa01fd859fb..a36abe2da13e 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -168,6 +168,14 @@ extern int sysctl_hardlockup_all_cpu_backtrace;
 #define sysctl_softlockup_all_cpu_backtrace 0
 #define sysctl_hardlockup_all_cpu_backtrace 0
 #endif
+
+#if defined(CONFIG_HARDLOCKUP_CHECK_TIMESTAMP) && \
+    defined(CONFIG_HARDLOCKUP_DETECTOR)
+void watchdog_update_hrtimer_threshold(u64 period);
+#else
+static inline void watchdog_update_hrtimer_threshold(u64 period) { }
+#endif
+
 extern bool is_hardlockup(void);
 struct ctl_table;
 extern int proc_watchdog(struct ctl_table *, int ,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 06d3389bca0d..f5d52024f6b7 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -240,6 +240,7 @@ static void set_sample_period(void)
 	 * hardlockup detector generates a warning
 	 */
 	sample_period = get_softlockup_thresh() * ((u64)NSEC_PER_SEC / 5);
+	watchdog_update_hrtimer_threshold(sample_period);
 }
 
 /* Commands for resetting the watchdog */
diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c
index 295a0d84934c..3a09ea1b1d3d 100644
--- a/kernel/watchdog_hld.c
+++ b/kernel/watchdog_hld.c
@@ -37,6 +37,62 @@ void arch_touch_nmi_watchdog(void)
 }
 EXPORT_SYMBOL(arch_touch_nmi_watchdog);
 
+#ifdef CONFIG_HARDLOCKUP_CHECK_TIMESTAMP
+static DEFINE_PER_CPU(ktime_t, last_timestamp);
+static DEFINE_PER_CPU(unsigned int, nmi_rearmed);
+static ktime_t watchdog_hrtimer_sample_threshold __read_mostly;
+
+void watchdog_update_hrtimer_threshold(u64 period)
+{
+	/*
+	 * The hrtimer runs with a period of (watchdog_threshold * 2) / 5
+	 *
+	 * So it runs effectively with 2.5 times the rate of the NMI
+	 * watchdog. That means the hrtimer should fire 2-3 times before
+	 * the NMI watchdog expires. The NMI watchdog on x86 is based on
+	 * unhalted CPU cycles, so if Turbo-Mode is enabled the CPU cycles
+	 * might run way faster than expected and the NMI fires in a
+	 * smaller period than the one deduced from the nominal CPU
+	 * frequency. Depending on the Turbo-Mode factor this might be fast
+	 * enough to get the NMI period smaller than the hrtimer watchdog
+	 * period and trigger false positives.
+	 *
+	 * The sample threshold is used to check in the NMI handler whether
+	 * the minimum time between two NMI samples has elapsed. That
+	 * prevents false positives.
+	 *
+	 * Set this to 4/5 of the actual watchdog threshold period so the
+	 * hrtimer is guaranteed to fire at least once within the real
+	 * watchdog threshold.
+	 */
+	watchdog_hrtimer_sample_threshold = period * 2;
+}
+
+static bool watchdog_check_timestamp(void)
+{
+	ktime_t delta, now = ktime_get_mono_fast_ns();
+
+	delta = now - __this_cpu_read(last_timestamp);
+	if (delta < watchdog_hrtimer_sample_threshold) {
+		/*
+		 * If ktime is jiffies based, a stalled timer would prevent
+		 * jiffies from being incremented and the filter would look
+		 * at a stale timestamp and never trigger.
+		 */
+		if (__this_cpu_inc_return(nmi_rearmed) < 10)
+			return false;
+	}
+	__this_cpu_write(nmi_rearmed, 0);
+	__this_cpu_write(last_timestamp, now);
+	return true;
+}
+#else
+static inline bool watchdog_check_timestamp(void)
+{
+	return true;
+}
+#endif
+
 static struct perf_event_attr wd_hw_attr = {
 	.type		= PERF_TYPE_HARDWARE,
 	.config		= PERF_COUNT_HW_CPU_CYCLES,
@@ -61,6 +117,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		return;
 	}
 
+	if (!watchdog_check_timestamp())
+		return;
+
 	/* check for a hardlockup
 	 * This is done by making sure our timer interrupt
 	 * is incrementing.  The timer interrupt should have
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 98fe715522e8..c617b9d1d6cb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -797,6 +797,13 @@ config HARDLOCKUP_DETECTOR_PERF
 	bool
 	select SOFTLOCKUP_DETECTOR
 
+#
+# Enables a timestamp based low pass filter to compensate for perf based
+# hard lockup detection which runs too fast due to turbo modes.
+#
+config HARDLOCKUP_CHECK_TIMESTAMP
+	bool
+
 #
 # arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard
 # lockup detector rather than the perf based detector.




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]