On 4/24/21 3:25 AM, Peter Enderborg wrote: > This is not a rebooting watchdog. It's function is to take other > actions than a hard reboot. On many complex system there is some > kind of manager that monitor and take action on slow systems. > Android has it's lowmemorykiller (lmkd), desktops has earlyoom. > This watchdog can be used to help monitor to preform some basic > action to keep the monitor running. > > It can also be used standalone. This add a policy that is > killing the process with highest oom_score_adj and using > oom functions to it quickly. I think it is a good usecase > for the patch. Memory siuations can be problematic for > software that monitor system, but other prolicys can > should also be possible. Like picking tasks from a memcg, or > specific UID's or what ever is low priority. > --- NACK. Besides this not following the new watchdog API, the task of a watchdog is to reset the system on failure. Its task is most definitely not to re-implement the oom killer in any way, shape, or form. Guenter > drivers/watchdog/Makefile | 2 + > drivers/watchdog/softwatchdog.c | 231 ++++++++++++++++++++++++++++++++ > include/uapi/linux/watchdog.h | 6 + > mm/oom_kill.c | 4 +- > 4 files changed, 241 insertions(+), 2 deletions(-) > create mode 100644 drivers/watchdog/softwatchdog.c > > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile > index f3a6540e725e..bff8f61fe504 100644 > --- a/drivers/watchdog/Makefile > +++ b/drivers/watchdog/Makefile > @@ -221,3 +221,5 @@ obj-$(CONFIG_MENZ069_WATCHDOG) += menz69_wdt.o > obj-$(CONFIG_RAVE_SP_WATCHDOG) += rave-sp-wdt.o > obj-$(CONFIG_STPMIC1_WATCHDOG) += stpmic1_wdt.o > obj-$(CONFIG_SL28CPLD_WATCHDOG) += sl28cpld_wdt.o > + > +obj-y += softwatchdog.o > diff --git a/drivers/watchdog/softwatchdog.c b/drivers/watchdog/softwatchdog.c > new file mode 100644 > index 000000000000..dd7153092da8 > --- /dev/null > +++ b/drivers/watchdog/softwatchdog.c > @@ -0,0 +1,231 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* Soft watchdog > + * This is a RFC of a watchdog does do not reboot the system. > + * Instead it do a pre defined action > + * > + * Author: peter.enderborg@xxxxxxxx > + */ > + > +#include <linux/kernel.h> > +#include <linux/mm.h> > +#include <linux/slab.h> > +#include <linux/oom.h> > +#include <linux/module.h> > +#include <linux/moduleparam.h> > +#include <linux/types.h> > +#include <linux/timer.h> > +#include <linux/miscdevice.h> > +#include <linux/watchdog.h> > +#include <linux/notifier.h> > +#include <linux/init.h> > +#include <linux/fs.h> > + > +void wake_oom_reaper(struct task_struct *tsk); /* need to public ... */ > +void __oom_kill_process(struct task_struct *victim, const char *message); /* hmm */ > + > +static struct timer_list swd_timer; > +static int heartbeat = 500; > +static unsigned long swd_is_open; > +static char expect_close; > +static bool nowayout = WATCHDOG_NOWAYOUT; > + > +/* a example policy doing a process kill by calling > + * functions within oom-killer. > + */ > +static int policy_fast_kill_oom_score_adj(void) > +{ > + struct task_struct *tsk; > + struct task_struct *selected = NULL; > + int highest = 0; > + > + rcu_read_lock(); > + for_each_process(tsk) { > + struct task_struct *candidate; > + > + if (tsk->flags & PF_KTHREAD) > + continue; > + > + /* Ignore task if coredump in progress */ > + if (tsk->mm && tsk->mm->core_state) > + continue; > + candidate = find_lock_task_mm(tsk); > + if (!candidate) > + continue; > + > + if (highest < candidate->signal->oom_score_adj) { > + /* for test dont kill level 0 */ > + highest = candidate->signal->oom_score_adj; > + selected = candidate; > + pr_info("new selected %d %d", selected->pid, > + selected->signal->oom_score_adj); > + } > + task_unlock(candidate); > + } > + if (selected) > + get_task_struct(selected); > + > + rcu_read_unlock(); > + if (selected) { > + int pid = selected->pid; > + > + pr_info("swd killing: %d", selected->pid); > + __oom_kill_process(selected, "swd"); > + return pid; > + } > + return 0; > +} > + > +static void swd_ping(void) > +{ > + mod_timer(&swd_timer, jiffies + heartbeat); > +} > + > +static long swd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) > +{ > + void __user *argp = (void __user *)arg; > + int __user *p = argp; > + int new_heartbeat; > + int status; > + > + struct watchdog_info ident = { > + .options = WDIOF_SETTIMEOUT| > + WDIOF_MAGICCLOSE| > + WDIOF_OOMKILL | > + WDIOF_AUTOKILL, > + .identity = "swd", > + }; > + > + switch (cmd) { > + case WDIOC_GETSUPPORT: > + return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; > + case WDIOC_GETSTATUS: > + status = 0; > + return put_user(status, p); > + case WDIOC_GETBOOTSTATUS: > + return put_user(0, p); > + case WDIOC_KEEPALIVE: > + swd_ping(); > + return 0; > + case WDIOC_SETTIMEOUT: > + if (get_user(new_heartbeat, p)) > + return -EFAULT; > + heartbeat = new_heartbeat; > + fallthrough; > + case WDIOC_GETTIMEOUT: > + return put_user(heartbeat, p); > + default: > + return -ENOTTY; > + } > + return -ENOTTY; > +} > + > +static void swd_timer_func(struct timer_list *unused) > +{ > + int res; > + > + pr_info("swd timer %d\n", heartbeat); > + res = policy_fast_kill_oom_score_adj(); > + if (res) > + pr_info("killed %d\n", res); > + > + mod_timer(&swd_timer, jiffies + heartbeat); > +} > + > +static int swd_start(void) > +{ > + add_timer(&swd_timer); > + return 0; > +} > + > +static int swd_stop(void) > +{ > + del_timer(&swd_timer); > + return 0; > +} > + > +static int swd_open(struct inode *inode, struct file *file) > +{ > + if (test_and_set_bit(0, &swd_is_open)) > + return -EBUSY; > + swd_start(); > + return stream_open(inode, file); > +} > + > +static int swd_release(struct inode *inode, struct file *file) > +{ > + if (expect_close != 42) { > + swd_stop(); > + clear_bit(0, &swd_is_open); > + } else { > + pr_crit("SWD device closed unexpectedly. SWD will not stop!\n"); > + swd_ping(); > + } > + expect_close = 0; > + return 0; > +} > +static ssize_t swd_write(struct file *file, const char __user *buf, > + size_t count, loff_t *ppos) > +{ > + if (count) { > + if (!nowayout) { > + size_t i; > + > + expect_close = 0; > + for (i = 0; i != count; i++) { > + char c; > + > + if (get_user(c, buf + i)) > + return -EFAULT; > + if (c == 'V') > + expect_close = 42; > + } > + } > + swd_ping(); > + } > + return count; > +} > + > +static const struct file_operations swd_fops = { > + .owner = THIS_MODULE, > + .llseek = no_llseek, > + .write = swd_write, > + .unlocked_ioctl = swd_ioctl, > + .compat_ioctl = compat_ptr_ioctl, > + .open = swd_open, > + .release = swd_release, > +}; > + > +static struct miscdevice swd_miscdev = { > + .minor = WATCHDOG_MINOR, > + .name = "soft-watchdog", > + .fops = &swd_fops, > +}; > + > +int __init swd_register(void) > +{ > + int ret; > + > + pr_info("swd installed with timer"); > + ret = misc_register(&swd_miscdev); > + timer_setup(&swd_timer, swd_timer_func, 0); > + return 0; > +} > + > +static int __init swd_init(void) > +{ > + return 0; > +} > + > +static void __exit swd_unload(void) > +{ > + return; > +} > + > +subsys_initcall(swd_register); > + > +module_init(swd_init); > +module_exit(swd_unload); > + > +MODULE_AUTHOR("Peter Enderborg"); > +MODULE_DESCRIPTION("Memory software watchdog"); > +MODULE_LICENSE("GPL"); > diff --git a/include/uapi/linux/watchdog.h b/include/uapi/linux/watchdog.h > index b15cde5c9054..780987482e2d 100644 > --- a/include/uapi/linux/watchdog.h > +++ b/include/uapi/linux/watchdog.h > @@ -48,6 +48,12 @@ struct watchdog_info { > #define WDIOF_PRETIMEOUT 0x0200 /* Pretimeout (in seconds), get/set */ > #define WDIOF_ALARMONLY 0x0400 /* Watchdog triggers a management or > other external alarm not a reboot */ > +#define WDIOF_OOMKILL 0x0800 /* Watchdog trigger process kill as > + * oom kill > + */ > +#define WDIOF_AUTOKILL 0x1000 /* Watchdog listen to oom notifiy > + * and kills with its policy > + */ > #define WDIOF_KEEPALIVEPING 0x8000 /* Keep alive ping reply */ > > #define WDIOS_DISABLECARD 0x0001 /* Turn off the watchdog timer */ > diff --git a/mm/oom_kill.c b/mm/oom_kill.c > index fa1cf18bac97..a5f7299af9a3 100644 > --- a/mm/oom_kill.c > +++ b/mm/oom_kill.c > @@ -658,7 +658,7 @@ static int oom_reaper(void *unused) > return 0; > } > > -static void wake_oom_reaper(struct task_struct *tsk) > +void wake_oom_reaper(struct task_struct *tsk) > { > /* mm is already queued? */ > if (test_and_set_bit(MMF_OOM_REAP_QUEUED, &tsk->signal->oom_mm->flags)) > @@ -856,7 +856,7 @@ static bool task_will_free_mem(struct task_struct *task) > return ret; > } > > -static void __oom_kill_process(struct task_struct *victim, const char *message) > +void __oom_kill_process(struct task_struct *victim, const char *message) > { > struct task_struct *p; > struct mm_struct *mm; >