When lockup_detector_init()->watchdog_nmi_probe(), PMU may be not ready yet. E.g. on arm64, PMU is not ready until device_initcall(armv8_pmu_driver_init). And it is deeply integrated with the driver model and cpuhp. Hence it is hard to push this initialization before smp_init(). But it is easy to take an opposite approach by enabling watchdog_hld to get the capability of PMU async. The async model is achieved by expanding watchdog_nmi_probe() with -EBUSY, and a re-initializing work_struct which waits on a wait_queue_head. Co-developed-by: Pingfan Liu <kernelfans@xxxxxxxxx> Signed-off-by: Pingfan Liu <kernelfans@xxxxxxxxx> Signed-off-by: Lecopzer Chen <lecopzer.chen@xxxxxxxxxxxx> Suggested-by: Petr Mladek <pmladek@xxxxxxxx> --- include/linux/nmi.h | 3 +++ kernel/watchdog.c | 62 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 63 insertions(+), 2 deletions(-) diff --git a/include/linux/nmi.h b/include/linux/nmi.h index b7bcd63c36b4..cc7df31be9db 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -118,6 +118,9 @@ static inline int hardlockup_detector_perf_init(void) { return 0; } void watchdog_nmi_stop(void); void watchdog_nmi_start(void); + +extern bool lockup_detector_pending_init; +extern struct wait_queue_head hld_detector_wait; int watchdog_nmi_probe(void); void watchdog_nmi_enable(unsigned int cpu); void watchdog_nmi_disable(unsigned int cpu); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b71d434cf648..49bdcaf5bd8f 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -103,7 +103,11 @@ void __weak watchdog_nmi_disable(unsigned int cpu) hardlockup_detector_perf_disable(); } -/* Return 0, if a NMI watchdog is available. Error code otherwise */ +/* + * Arch specific API. Return 0, if a NMI watchdog is available. -EBUSY if not + * ready, and arch code should wake up hld_detector_wait when ready. Other + * negative value if not support. + */ int __weak __init watchdog_nmi_probe(void) { return hardlockup_detector_perf_init(); @@ -839,16 +843,70 @@ static void __init watchdog_sysctl_init(void) #define watchdog_sysctl_init() do { } while (0) #endif /* CONFIG_SYSCTL */ +static void lockup_detector_delay_init(struct work_struct *work); +bool lockup_detector_pending_init __initdata; + +struct wait_queue_head hld_detector_wait __initdata = + __WAIT_QUEUE_HEAD_INITIALIZER(hld_detector_wait); + +static struct work_struct detector_work __initdata = + __WORK_INITIALIZER(detector_work, lockup_detector_delay_init); + +static void __init lockup_detector_delay_init(struct work_struct *work) +{ + int ret; + + wait_event(hld_detector_wait, + lockup_detector_pending_init == false); + + /* + * Here, we know the PMU should be ready, so set pending to true to + * inform watchdog_nmi_probe() that it shouldn't return -EBUSY again. + */ + lockup_detector_pending_init = true; + ret = watchdog_nmi_probe(); + if (ret) { + pr_info("Delayed init of the lockup detector failed: %d\n", ret); + pr_info("Perf NMI watchdog permanently disabled\n"); + return; + } + + nmi_watchdog_available = true; + lockup_detector_setup(); + lockup_detector_pending_init = false; +} + +/* Ensure the check is called after the initialization of PMU driver */ +static int __init lockup_detector_check(void) +{ + if (!lockup_detector_pending_init) + return 0; + + pr_info("Delayed init checking failed, retry for once.\n"); + lockup_detector_pending_init = false; + wake_up(&hld_detector_wait); + return 0; +} +late_initcall_sync(lockup_detector_check); + void __init lockup_detector_init(void) { + int ret; + if (tick_nohz_full_enabled()) pr_info("Disabling watchdog on nohz_full cores by default\n"); cpumask_copy(&watchdog_cpumask, housekeeping_cpumask(HK_FLAG_TIMER)); - if (!watchdog_nmi_probe()) + ret = watchdog_nmi_probe(); + if (!ret) nmi_watchdog_available = true; + else if (ret == -EBUSY) { + lockup_detector_pending_init = true; + queue_work_on(smp_processor_id(), system_wq, &detector_work); + } + lockup_detector_setup(); watchdog_sysctl_init(); } -- 2.25.1