On 7/7/20 1:46 AM, Woody Lin wrote: > On Tue, Jul 7, 2020 at 12:04 PM Guenter Roeck <linux@xxxxxxxxxxxx> wrote: >> >> On Wed, Jul 01, 2020 at 07:03:40PM +0800, Woody Lin wrote: >>> Add module parameters 'soft_reboot_cmd' and 'soft_active_on_boot' for >>> customizing softdog configuration; config reboot command by assigning >>> soft_reboot_cmd, and set soft_active_on_boot to start up softdog >>> timer at module initialization stage. >>> >>> Signed-off-by: Woody Lin <woodylin@xxxxxxxxxx> >>> --- >>> drivers/watchdog/softdog.c | 56 ++++++++++++++++++++++++++++++++++++++ >>> 1 file changed, 56 insertions(+) >>> >>> diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c >>> index 3e4885c1545e..8c8d214b6aa7 100644 >>> --- a/drivers/watchdog/softdog.c >>> +++ b/drivers/watchdog/softdog.c >>> @@ -20,11 +20,13 @@ >>> #include <linux/hrtimer.h> >>> #include <linux/init.h> >>> #include <linux/kernel.h> >>> +#include <linux/kthread.h> >>> #include <linux/module.h> >>> #include <linux/moduleparam.h> >>> #include <linux/reboot.h> >>> #include <linux/types.h> >>> #include <linux/watchdog.h> >>> +#include <linux/workqueue.h> >>> >>> #define TIMER_MARGIN 60 /* Default is 60 seconds */ >>> static unsigned int soft_margin = TIMER_MARGIN; /* in seconds */ >>> @@ -49,11 +51,33 @@ module_param(soft_panic, int, 0); >>> MODULE_PARM_DESC(soft_panic, >>> "Softdog action, set to 1 to panic, 0 to reboot (default=0)"); >>> >>> +static char *soft_reboot_cmd; >>> +module_param(soft_reboot_cmd, charp, 0000); >>> +MODULE_PARM_DESC(soft_reboot_cmd, >>> + "Set reboot command. Emergency reboot takes place if unset"); >>> + >>> +static bool soft_active_on_boot; >>> +module_param(soft_active_on_boot, bool, 0000); >>> +MODULE_PARM_DESC(soft_active_on_boot, >>> + "Set to true to active Softdog on boot (default=false)"); >>> + >>> static struct hrtimer softdog_ticktock; >>> static struct hrtimer softdog_preticktock; >>> >>> +static int reboot_kthread_fn(void *data) >>> +{ >>> + kernel_restart(soft_reboot_cmd); >>> + return -EPERM; /* Should not reach here */ >>> +} >>> + >>> +static void reboot_work_fn(struct work_struct *unused) >>> +{ >>> + kthread_run(reboot_kthread_fn, NULL, "softdog_reboot"); >>> +} >>> + >>> static enum hrtimer_restart softdog_fire(struct hrtimer *timer) >>> { >>> + static bool soft_reboot_fired; >> >> Per coding style there should be an empty line here. > > Ack. > >> >>> module_put(THIS_MODULE); >>> if (soft_noboot) { >>> pr_crit("Triggered - Reboot ignored\n"); >>> @@ -62,6 +86,33 @@ static enum hrtimer_restart softdog_fire(struct hrtimer *timer) >>> panic("Software Watchdog Timer expired"); >>> } else { >>> pr_crit("Initiating system reboot\n"); >>> + if (!soft_reboot_fired && soft_reboot_cmd != NULL) { >>> + static DECLARE_WORK(reboot_work, reboot_work_fn); >>> + /* >>> + * The 'kernel_restart' is a 'might-sleep' operation. >>> + * Also, executing it in system-wide workqueues blocks >>> + * any driver from using the same workqueue in its >>> + * shutdown callback function. Thus, we should execute >>> + * the 'kernel_restart' in a standalone kernel thread. >>> + * But since starting a kernel thread is also a >>> + * 'might-sleep' operation, so the 'reboot_work' is >>> + * required as a launcher of the kernel thread. >>> + * >>> + * After request the reboot, restart the timer to >>> + * schedule an 'emergency_restart' reboot after >>> + * 'TIMER_MARGIN' seconds. It's because if the softdog >>> + * hangs, it might be because of scheduling issues. And >>> + * if that is the case, both 'schedule_work' and >>> + * 'kernel_restart' may possibly be malfunctional at the >>> + * same time. >>> + */ >>> + soft_reboot_fired = true; >>> + schedule_work(&reboot_work); >>> + hrtimer_add_expires_ns(timer, >>> + (u64)TIMER_MARGIN * NSEC_PER_SEC); >>> + >>> + return HRTIMER_RESTART; >>> + } >>> emergency_restart(); >>> pr_crit("Reboot didn't ?????\n"); >>> } >>> @@ -145,12 +196,17 @@ static int __init softdog_init(void) >>> softdog_preticktock.function = softdog_pretimeout; >>> } >>> >>> + if (soft_active_on_boot) >>> + softdog_ping(&softdog_dev); >>> + >>> ret = watchdog_register_device(&softdog_dev); >>> if (ret) >>> return ret; >>> >>> pr_info("initialized. soft_noboot=%d soft_margin=%d sec soft_panic=%d (nowayout=%d)\n", >>> soft_noboot, softdog_dev.timeout, soft_panic, nowayout); >>> + pr_info(" soft_reboot_cmd=%s soft_active_on_boot=%d\n", >>> + soft_reboot_cmd, soft_active_on_boot); >> >> soft_reboot_cmd can be NULL, which makes the output a bit awkward. > > Then how about change it to something like this: > "soft_reboot_cmd=%s", soft_reboot_cmd ?: "<null> (emergency reboot)" > Then we will see "soft_reboot_cmd=<null> (emergency reboot)" when it's NULL. I'd rather see something like "<not set>". "<null>" looks like an error. Also, it isn't correct to assume emergency reboot; that is only correct if neither soft_noboot nor soft_panic is set. Thanks, Guenter