On Mon, Apr 25, 2022 at 01:42:07PM +0000, Sebastian Ene wrote: > This driver creates per-cpu hrtimers which are required to do the > periodic 'pet' operation. On a conventional watchdog-core driver, the > userspace is responsible for delivering the 'pet' events by writing to > the particular /dev/watchdogN node. In this case we require a strong > thread affinity to be able to account for lost time on a per vCPU. > > This part of the driver is the 'frontend' which is reponsible for > delivering the periodic 'pet' events, configuring the virtual peripheral > and listening for cpu hotplug events. The other part of the driver > handles the peripheral emulation and this part accounts for lost time by > looking at the /proc/{}/task/{}/stat entries and is located here: > https://chromium-review.googlesource.com/c/chromiumos/platform/crosvm/+/3548817 > > Signed-off-by: Sebastian Ene <sebastianene@xxxxxxxxxx> > --- > drivers/misc/Kconfig | 12 +++ > drivers/misc/Makefile | 1 + > drivers/misc/vm-wdt.c | 207 ++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 220 insertions(+) > create mode 100644 drivers/misc/vm-wdt.c > > diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig > index 2b9572a6d114..71c173e3f064 100644 > --- a/drivers/misc/Kconfig > +++ b/drivers/misc/Kconfig > @@ -493,6 +493,18 @@ config OPEN_DICE > > If unsure, say N. > > +config VM_WATCHDOG > + tristate "Virtual Machine Watchdog" > + select LOCKUP_DETECTOR > + help > + Detect CPU locks on the virtual machine. This driver relies on the > + hrtimers which are CPU-binded to do the 'pet' operation. When a vCPU > + has to do a 'pet', it exists the guest through MMIO write and the > + backend driver takes into account the lost ticks for this particular > + CPU. > + To compile this driver as a module, choose M here: the > + module will be called vm-wdt. > + > source "drivers/misc/c2port/Kconfig" > source "drivers/misc/eeprom/Kconfig" > source "drivers/misc/cb710/Kconfig" > diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile > index 2ec634354cf5..fa9d644da5db 100644 > --- a/drivers/misc/Makefile > +++ b/drivers/misc/Makefile > @@ -59,3 +59,4 @@ obj-$(CONFIG_XILINX_SDFEC) += xilinx_sdfec.o > obj-$(CONFIG_HISI_HIKEY_USB) += hisi_hikey_usb.o > obj-$(CONFIG_UID_SYS_STATS) += uid_sys_stats.o > obj-$(CONFIG_OPEN_DICE) += open-dice.o > +obj-$(CONFIG_VM_WATCHDOG) += vm-wdt.o We have no limit on names, why not "vm-watchdog"? > \ No newline at end of file > diff --git a/drivers/misc/vm-wdt.c b/drivers/misc/vm-wdt.c > new file mode 100644 > index 000000000000..0c4df2fefbb9 > --- /dev/null > +++ b/drivers/misc/vm-wdt.c > @@ -0,0 +1,207 @@ > +// SPDX-License-Identifier: GPL-2.0 > +// > +// Virtual watchdog driver. > +// Copyright (C) Google, 2022 I will need a watchdog maintainer to agree that this is the way to do this as I really really do not understand why you can not use that subsystem here. > + > +#include <linux/cpu.h> > +#include <linux/init.h> > +#include <linux/io.h> > +#include <linux/kernel.h> > + > +#include <linux/device.h> > +#include <linux/interrupt.h> > +#include <linux/module.h> > +#include <linux/nmi.h> > +#include <linux/of.h> > +#include <linux/of_device.h> > +#include <linux/param.h> > +#include <linux/percpu.h> > +#include <linux/platform_device.h> > +#include <linux/slab.h> > + > +#define DRV_NAME "vm_wdt" KBUILD_MODNAME please > + > +#define VMWDT_REG_STATUS (0x00) > +#define VMWDT_REG_LOAD_CNT (0x04) > +#define VMWDT_REG_CURRENT_CNT (0x08) > +#define VMWDT_REG_CLOCK_FREQ_HZ (0x0C) > +#define VMWDT_REG_LEN (0x10) > + > +#define VMWDT_DEFAULT_CLOCK_HZ (10) > +#define VMWDT_DEFAULT_TIMEOT_SEC (8) > + > +struct vm_wdt_s { > + void __iomem *membase; > + u32 clock_freq; > + u32 expiration_sec; > + u32 ping_timeout_ms; > + struct hrtimer per_cpu_hrtimer; > + struct platform_device *dev; > +}; > + > +#define vmwdt_reg_write(wdt, reg, value) \ > + iowrite32((value), (wdt)->membase + (reg)) > +#define vmwdt_reg_read(wdt, reg) \ > + io32read((wdt)->membase + (reg)) > + > +static struct platform_device *virt_dev; Only one device in the system? Please no, use the correct apis and you will not have any limits like this. > + > +static enum hrtimer_restart vmwdt_timer_fn(struct hrtimer *hrtimer) > +{ > + struct vm_wdt_s *cpu_wdt; > + u32 ticks; > + > + cpu_wdt = container_of(hrtimer, struct vm_wdt_s, per_cpu_hrtimer); > + ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec; > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks); > + hrtimer_forward_now(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms)); > + > + return HRTIMER_RESTART; > +} > + > +static void vmwdt_start(void *arg) > +{ > + u32 ticks; > + struct vm_wdt_s *cpu_wdt = arg; > + struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer; > + > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_CLOCK_FREQ_HZ, > + cpu_wdt->clock_freq); > + > + /* Compute the number of ticks required for the watchdog counter > + * register based on the internal clock frequency and the watchdog > + * timeout given from the device tree. > + */ > + ticks = cpu_wdt->clock_freq * cpu_wdt->expiration_sec; > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_LOAD_CNT, ticks); > + > + /* Enable the internal clock and start the watchdog */ > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 1); > + > + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); > + hrtimer->function = vmwdt_timer_fn; > + hrtimer_start(hrtimer, ms_to_ktime(cpu_wdt->ping_timeout_ms), > + HRTIMER_MODE_REL_PINNED); > +} > + > +static void vmwdt_stop(void *arg) > +{ > + struct vm_wdt_s *cpu_wdt = arg; > + struct hrtimer *hrtimer = &cpu_wdt->per_cpu_hrtimer; > + > + hrtimer_cancel(hrtimer); > + > + /* Disable the watchdog */ > + vmwdt_reg_write(cpu_wdt, VMWDT_REG_STATUS, 0); > +} > + > +static int start_watchdog_on_cpu(unsigned int cpu) > +{ > + struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev); > + > + vmwdt_start(this_cpu_ptr(vm_wdt)); > + return 0; > +} > + > +static int stop_watchdog_on_cpu(unsigned int cpu) > +{ > + struct vm_wdt_s *vm_wdt = platform_get_drvdata(virt_dev); > + > + vmwdt_stop(this_cpu_ptr(vm_wdt)); > + return 0; > +} > + > +static int vmwdt_probe(struct platform_device *dev) > +{ > + int cpu, ret, err; > + void __iomem *membase; > + struct resource *r; > + struct vm_wdt_s *vm_wdt; > + u32 wdt_clock, wdt_timeout_sec = 0; > + > + r = platform_get_resource(dev, IORESOURCE_MEM, 0); > + if (r == NULL) > + return -ENOENT; > + > + vm_wdt = alloc_percpu(typeof(struct vm_wdt_s)); > + if (!vm_wdt) > + return -ENOMEM; > + > + membase = ioremap(r->start, resource_size(r)); > + if (!membase) { > + ret = -ENXIO; > + goto err_withmem; > + } > + > + virt_dev = dev; > + platform_set_drvdata(dev, vm_wdt); > + if (of_property_read_u32(dev->dev.of_node, "clock", &wdt_clock)) > + wdt_clock = VMWDT_DEFAULT_CLOCK_HZ; > + > + if (of_property_read_u32(dev->dev.of_node, "timeout-sec", > + &wdt_timeout_sec)) > + wdt_timeout_sec = VMWDT_DEFAULT_TIMEOT_SEC; > + > + for_each_cpu_and(cpu, cpu_online_mask, &watchdog_cpumask) { > + struct vm_wdt_s *cpu_wdt = per_cpu_ptr(vm_wdt, cpu); > + > + cpu_wdt->membase = membase + cpu * VMWDT_REG_LEN; > + cpu_wdt->clock_freq = wdt_clock; > + cpu_wdt->expiration_sec = wdt_timeout_sec; > + cpu_wdt->ping_timeout_ms = wdt_timeout_sec * MSEC_PER_SEC / 2; > + smp_call_function_single(cpu, vmwdt_start, cpu_wdt, true); > + } > + > + err = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, > + "virt/watchdog:online", > + start_watchdog_on_cpu, > + stop_watchdog_on_cpu); > + if (err < 0) { > + pr_warn("could not be initialized"); drivers should never use pr_* calls. dev_warn() please. thanks, greg k-h