If a daemon has /dev/watchdog open, and exits (say by crashing or sigkill), watchdog_release() generates a new watchdog_ping(), extending the life of the machine. However, the daemon may require control over whether or not the watchdog is pinged, and the ping generated by close compromises this control. I'm using the watchdog to reset machines a cluster when their shared leases expire. Machine A will not ping its watchdog after one of its leases expires, intending that it be reset by its watchdog. Machine B can then assume that 60 seconds after the lease expiration, machine A will be reset, and it can safely acquire A's leases. The problem is that if the daemon on machine A exits sometime during the 60 seconds prior to the watchdog firing, watchdog_close() generates a new ping, extending the the life of A by a new 60 seconds. To account for this, B must wait 120 seconds instead of 60 seconds to account for the additional ping the kernel may have inserted. Signed-off-by: David Teigland <teigland@xxxxxxxxxx> --- drivers/watchdog/watchdog_dev.c | 9 ++++++++- include/linux/watchdog.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c index ef8edec..b77442a 100644 --- a/drivers/watchdog/watchdog_dev.c +++ b/drivers/watchdog/watchdog_dev.c @@ -339,6 +339,11 @@ static long watchdog_ioctl(struct file *file, unsigned int cmd, unsigned int val; int err; + if (cmd == WDIOC_NOCLOSEPING) { + set_bit(WDOG_NO_CLOSE_PING, &wdd->status); + return 0; + } + err = watchdog_ioctl_op(wdd, cmd, arg); if (err != -ENOIOCTLCMD) return err; @@ -480,7 +485,9 @@ static int watchdog_release(struct inode *inode, struct file *file) if (!test_bit(WDOG_UNREGISTERED, &wdd->status)) dev_crit(wdd->dev, "watchdog did not stop!\n"); mutex_unlock(&wdd->lock); - watchdog_ping(wdd); + + if (!test_bit(WDOG_NO_CLOSE_PING, &wdd->status)) + watchdog_ping(wdd); } /* Allow the owner module to be unloaded again */ diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index da70f0f..4fa10ff 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -31,6 +31,7 @@ struct watchdog_info { #define WDIOC_SETPRETIMEOUT _IOWR(WATCHDOG_IOCTL_BASE, 8, int) #define WDIOC_GETPRETIMEOUT _IOR(WATCHDOG_IOCTL_BASE, 9, int) #define WDIOC_GETTIMELEFT _IOR(WATCHDOG_IOCTL_BASE, 10, int) +#define WDIOC_NOCLOSEPING _IOR(WATCHDOG_IOCTL_BASE, 11, int) #define WDIOF_UNKNOWN -1 /* Unknown flag error */ #define WDIOS_UNKNOWN -1 /* Unknown status error */ @@ -140,6 +141,7 @@ struct watchdog_device { #define WDOG_ALLOW_RELEASE 2 /* Did we receive the magic char ? */ #define WDOG_NO_WAY_OUT 3 /* Is 'nowayout' feature set ? */ #define WDOG_UNREGISTERED 4 /* Has the device been unregistered */ +#define WDOG_NO_CLOSE_PING 5 /* Do not ping in watchdog_release() */ }; #ifdef CONFIG_WATCHDOG_NOWAYOUT -- 1.7.10.1.362.g242cab3 -- To unsubscribe from this list: send the line "unsubscribe linux-watchdog" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html