On 4/24/21 4:41 PM, Guenter Roeck wrote: > On 4/24/21 3:25 AM, Peter Enderborg wrote: >> This is not a rebooting watchdog. It's function is to take other >> actions than a hard reboot. On many complex system there is some >> kind of manager that monitor and take action on slow systems. >> Android has it's lowmemorykiller (lmkd), desktops has earlyoom. >> This watchdog can be used to help monitor to preform some basic >> action to keep the monitor running. >> >> It can also be used standalone. This add a policy that is >> killing the process with highest oom_score_adj and using >> oom functions to it quickly. I think it is a good usecase >> for the patch. Memory siuations can be problematic for >> software that monitor system, but other prolicys can >> should also be possible. Like picking tasks from a memcg, or >> specific UID's or what ever is low priority. >> --- > NACK. Besides this not following the new watchdog API, the task > of a watchdog is to reset the system on failure. Its task is most > definitely not to re-implement the oom killer in any way, shape, > or form. > > Guenter Do you have better idea where the re-invented wheel might fit better if it not for watchdog API? > >> drivers/watchdog/Makefile | 2 + >> drivers/watchdog/softwatchdog.c | 231 ++++++++++++++++++++++++++++++++ >> include/uapi/linux/watchdog.h | 6 + >> mm/oom_kill.c | 4 +- >> 4 files changed, 241 insertions(+), 2 deletions(-) >> create mode 100644 drivers/watchdog/softwatchdog.c >> >> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile >> index f3a6540e725e..bff8f61fe504 100644 >> --- a/drivers/watchdog/Makefile >> +++ b/drivers/watchdog/Makefile >> @@ -221,3 +221,5 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o >> obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o >> obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o >> obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o >> + >> +obj-y += softwatchdog.o >> diff --git a/drivers/watchdog/softwatchdog.c b/drivers/watchdog/softwatchdog.c >> new file mode 100644 >> index 000000000000..dd7153092da8 >> --- /dev/null >> +++ b/drivers/watchdog/softwatchdog.c >> @@ -0,0 +1,231 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> +/* Soft watchdog >> + * This is a RFC of a watchdog does do not reboot the system. >> + * Instead it do a pre defined action >> + * >> + * Author: peter.enderborg@xxxxxxxx >> + */ >> + >> +#include <linux/kernel.h> >> +#include <linux/mm.h> >> +#include <linux/slab.h> >> +#include <linux/oom.h> >> +#include <linux/module.h> >> +#include <linux/moduleparam.h> >> +#include <linux/types.h> >> +#include <linux/timer.h> >> +#include <linux/miscdevice.h> >> +#include <linux/watchdog.h> >> +#include <linux/notifier.h> >> +#include <linux/init.h> >> +#include <linux/fs.h> >> + >> +void wake_oom_reaper(struct task_struct *tsk); /* need to public ... */ >> +void __oom_kill_process(struct task_struct *victim, const char *message); /* hmm */ >> + >> +static struct timer_list swd_timer; >> +static int heartbeat = 500; >> +static unsigned long swd_is_open; >> +static char expect_close; >> +static bool nowayout = WATCHDOG_NOWAYOUT; >> + >> +/* a example policy doing a process kill by calling >> + * functions within oom-killer. >> + */ >> +static int policy_fast_kill_oom_score_adj(void) >> +{ >> + struct task_struct *tsk; >> + struct task_struct *selected = NULL; >> + int highest = 0; >> + >> + rcu_read_lock(); >> + for_each_process(tsk) { >> + struct task_struct *candidate; >> + >> + if (tsk->flags & PF_KTHREAD) >> + continue; >> + >> + /* Ignore task if coredump in progress */ >> + if (tsk->mm && tsk->mm->core_state) >> + continue; >> + candidate = find_lock_task_mm(tsk); >> + if (!candidate) >> + continue; >> + >> + if (highest < candidate->signal->oom_score_adj) { >> + /* for test dont kill level 0 */ >> + highest = candidate->signal->oom_score_adj; >> + selected = candidate; >> + pr_info("new selected %d %d", selected->pid, >> + selected->signal->oom_score_adj); >> + } >> + task_unlock(candidate); >> + } >> + if (selected) >> + get_task_struct(selected); >> + >> + rcu_read_unlock(); >> + if (selected) { >> + int pid = selected->pid; >> + >> + pr_info("swd killing: %d", selected->pid); >> + __oom_kill_process(selected, "swd"); >> + return pid; >> + } >> + return 0; >> +} >> + >> +static void swd_ping(void) >> +{ >> + mod_timer(&swd_timer, jiffies + heartbeat); >> +} >> + >> +static long swd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) >> +{ >> + void __user *argp = (void __user *)arg; >> + int __user *p = argp; >> + int new_heartbeat; >> + int status; >> + >> + struct watchdog_info ident = { >> + .options = WDIOF_SETTIMEOUT| >> + WDIOF_MAGICCLOSE| >> + WDIOF_OOMKILL | >> + WDIOF_AUTOKILL, >> + .identity = "swd", >> + }; >> + >> + switch (cmd) { >> + case WDIOC_GETSUPPORT: >> + return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; >> + case WDIOC_GETSTATUS: >> + status = 0; >> + return put_user(status, p); >> + case WDIOC_GETBOOTSTATUS: >> + return put_user(0, p); >> + case WDIOC_KEEPALIVE: >> + swd_ping(); >> + return 0; >> + case WDIOC_SETTIMEOUT: >> + if (get_user(new_heartbeat, p)) >> + return -EFAULT; >> + heartbeat = new_heartbeat; >> + fallthrough; >> + case WDIOC_GETTIMEOUT: >> + return put_user(heartbeat, p); >> + default: >> + return -ENOTTY; >> + } >> + return -ENOTTY; >> +} >> + >> +static void swd_timer_func(struct timer_list *unused) >> +{ >> + int res; >> + >> + pr_info("swd timer %d\n", heartbeat); >> + res = policy_fast_kill_oom_score_adj(); >> + if (res) >> + pr_info("killed %d\n", res); >> + >> + mod_timer(&swd_timer, jiffies + heartbeat); >> +} >> + >> +static int swd_start(void) >> +{ >> + add_timer(&swd_timer); >> + return 0; >> +} >> + >> +static int swd_stop(void) >> +{ >> + del_timer(&swd_timer); >> + return 0; >> +} >> + >> +static int swd_open(struct inode *inode, struct file *file) >> +{ >> + if (test_and_set_bit(0, &swd_is_open)) >> + return -EBUSY; >> + swd_start(); >> + return stream_open(inode, file); >> +} >> + >> +static int swd_release(struct inode *inode, struct file *file) >> +{ >> + if (expect_close != 42) { >> + swd_stop(); >> + clear_bit(0, &swd_is_open); >> + } else { >> + pr_crit("SWD device closed unexpectedly. SWD will not stop!\n"); >> + swd_ping(); >> + } >> + expect_close = 0; >> + return 0; >> +} >> +static ssize_t swd_write(struct file *file, const char __user *buf, >> + size_t count, loff_t *ppos) >> +{ >> + if (count) { >> + if (!nowayout) { >> + size_t i; >> + >> + expect_close = 0; >> + for (i = 0; i != count; i++) { >> + char c; >> + >> + if (get_user(c, buf + i)) >> + return -EFAULT; >> + if (c == 'V') >> + expect_close = 42; >> + } >> + } >> + swd_ping(); >> + } >> + return count; >> +} >> + >> +static const struct file_operations swd_fops = { >> + .owner = THIS_MODULE, >> + .llseek = no_llseek, >> + .write = swd_write, >> + .unlocked_ioctl = swd_ioctl, >> + .compat_ioctl = compat_ptr_ioctl, >> + .open = swd_open, >> + .release = swd_release, >> +}; >> + >> +static struct miscdevice swd_miscdev = { >> + .minor = WATCHDOG_MINOR, >> + .name = "soft-watchdog", >> + .fops = &swd_fops, >> +}; >> + >> +int __init swd_register(void) >> +{ >> + int ret; >> + >> + pr_info("swd installed with timer"); >> + ret = misc_register(&swd_miscdev); >> + timer_setup(&swd_timer, swd_timer_func, 0); >> + return 0; >> +} >> + >> +static int __init swd_init(void) >> +{ >> + return 0; >> +} >> + >> +static void __exit swd_unload(void) >> +{ >> + return; >> +} >> + >> +subsys_initcall(swd_register); >> + >> +module_init(swd_init); >> +module_exit(swd_unload); >> + >> +MODULE_AUTHOR("Peter Enderborg"); >> +MODULE_DESCRIPTION("Memory software watchdog"); >> +MODULE_LICENSE("GPL"); >> diff --git a/include/uapi/linux/watchdog.h b/include/uapi/linux/watchdog.h >> index b15cde5c9054..780987482e2d 100644 >> --- a/include/uapi/linux/watchdog.h >> +++ b/include/uapi/linux/watchdog.h >> @@ -48,6 +48,12 @@ struct watchdog_info { >> #define WDIOF_PRETIMEOUT 0x0200 /* Pretimeout (in seconds), get/set */ >> #define WDIOF_ALARMONLY 0x0400 /* Watchdog triggers a management or >> other external alarm not a reboot */ >> +#define WDIOF_OOMKILL 0x0800 /* Watchdog trigger process kill as >> + * oom kill >> + */ >> +#define WDIOF_AUTOKILL 0x1000 /* Watchdog listen to oom notifiy >> + * and kills with its policy >> + */ >> #define WDIOF_KEEPALIVEPING 0x8000 /* Keep alive ping reply */ >> >> #define WDIOS_DISABLECARD 0x0001 /* Turn off the watchdog timer */ >> diff --git a/mm/oom_kill.c b/mm/oom_kill.c >> index fa1cf18bac97..a5f7299af9a3 100644 >> --- a/mm/oom_kill.c >> +++ b/mm/oom_kill.c >> @@ -658,7 +658,7 @@ static int oom_reaper(void *unused) >> return 0; >> } >> >> -static void wake_oom_reaper(struct task_struct *tsk) >> +void wake_oom_reaper(struct task_struct *tsk) >> { >> /* mm is already queued? */ >> if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) >> @@ -856,7 +856,7 @@ static bool task_will_free_mem(struct task_struct *task) >> return ret; >> } >> >> -static void __oom_kill_process(struct task_struct *victim, const char *message) >> +void __oom_kill_process(struct task_struct *victim, const char *message) >> { >> struct task_struct *p; >> struct mm_struct *mm; >>