On Wed, Feb 06, 2019 at 04:50:36PM +0000, michaelsh@xxxxxxxxxxxx wrote: > From: Michael Shych <michaelsh@xxxxxxxxxxxx> > > Introduce watchdog driver for variious rang of Mellanox Ethernet and s/variious/various/ > Infiniband switch systems. > > Watchdog driver for Mellanox watchdog devices, implemented in > programmable logic device. > > Main and auxiliary watchdog devices can exist on the same system. > There are several actions that can be defined in the watchdog: > system reset, start fans on full speed and increase a counter. > The last 2 actions are performed without a system reset. > Actions without reset are provided for auxiliary watchdog devices, > which is optional. > Access to HW registers is performed through generic > regmap interface. > > There are 2 types of HW watchdog implementations. > Type 1: actual HW timeout can be defined as power of 2 msec. > e.g. timeout 20 sec will be rounded up to 32768 msec.; > maximum timeout period is 32 sec (32768 msec.); > get time-left isn't supported > Type 2: actual HW timeout is defined in sec. and it's the same as > user-defined timeout; > maximum timeout is 255 sec; > get time-left is supported; > > Watchdog driver is probed from the common mlx_platform driver. > > Signed-off-by: Michael Shych <michaelsh@xxxxxxxxxxxx> > --- > drivers/watchdog/Kconfig | 16 ++ > drivers/watchdog/Makefile | 1 + > drivers/watchdog/mlx_wdt.c | 406 +++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 423 insertions(+) > create mode 100644 drivers/watchdog/mlx_wdt.c > > diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig > index 57f017d74a97..f1766eb081bb 100644 > --- a/drivers/watchdog/Kconfig > +++ b/drivers/watchdog/Kconfig > @@ -241,6 +241,22 @@ config RAVE_SP_WATCHDOG > help > Support for the watchdog on RAVE SP device. > > +config MLX_WDT > + tristate "Mellanox Watchdog" > + depends on MELLANOX_PLATFORM > + select WATCHDOG_CORE > + select REGMAP > + help > + This is the driver for the hardware watchdog on Mellanox systems. > + If you are going to use it, say Y here, otherwise N. > + This driver can be used together with the watchdog daemon. > + It can also watch your kernel to make sure it doesn't freeze, > + and if it does, it reboots your system after a certain amount of > + time. > + > + To compile this driver as a module, choose M here: the > + module will be called mlx-wdt. > + > # ALPHA Architecture > > # ARM Architecture > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile > index a0917ef28e07..941b74185c9c 100644 > --- a/drivers/watchdog/Makefile > +++ b/drivers/watchdog/Makefile > @@ -142,6 +142,7 @@ obj-$(CONFIG_INTEL_MID_WATCHDOG) += intel-mid_wdt.o > obj-$(CONFIG_INTEL_MEI_WDT) += mei_wdt.o > obj-$(CONFIG_NI903X_WDT) += ni903x_wdt.o > obj-$(CONFIG_NIC7018_WDT) += nic7018_wdt.o > +obj-$(CONFIG_MLX_WDT) += mlx_wdt.o > > # M68K Architecture > obj-$(CONFIG_M54xx_WATCHDOG) += m54xx_wdt.o > diff --git a/drivers/watchdog/mlx_wdt.c b/drivers/watchdog/mlx_wdt.c > new file mode 100644 > index 000000000000..be28aa165cf2 > --- /dev/null > +++ b/drivers/watchdog/mlx_wdt.c > @@ -0,0 +1,406 @@ > +// SPDX-License-Identifier: GPL-2.0+ > +/* > + * Mellanox watchdog driver > + * > + * Copyright (C) 2019 Mellanox Technologies > + * Copyright (C) 2019 Michael Shych <mshych@xxxxxxxxxxxx> > + */ > + > +#include <linux/bitops.h> > +#include <linux/device.h> > +#include <linux/errno.h> > +#include <linux/log2.h> > +#include <linux/module.h> > +#include <linux/platform_data/mlxreg.h> > +#include <linux/platform_device.h> > +#include <linux/regmap.h> > +#include <linux/spinlock.h> > +#include <linux/types.h> > +#include <linux/watchdog.h> > + > +#define MLXREG_WDT_CLOCK_SCALE 1000 > +#define MLXREG_WDT_MAX_TIMEOUT_TYPE1 32 > +#define MLXREG_WDT_MAX_TIMEOUT_TYPE2 255 > +#define MLXREG_WDT_MIN_TIMEOUT 1 > +#define MLXREG_WDT_HW_TIMEOUT_CONVERT(hw_timeout) ((1 << (hw_timeout)) \ > + / MLXREG_WDT_CLOCK_SCALE) > +#define MLXREG_WDT_OPTIONS_BASE (WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | \ > + WDIOF_SETTIMEOUT) > + > +/** > + * enum mlxreg_wdt_type - type of HW watchdog > + * > + * TYPE1 can be differentiated by different register/mask > + * for WD action set and ping. > + */ > +enum mlxreg_wdt_type { > + MLX_WDT_TYPE1, > + MLX_WDT_TYPE2, > +}; > + > +/** > + * struct mlxreg_wdt - wd private data: > + * > + * @wdd: watchdog device; > + * @device: basic device; > + * @pdata: data received from platform driver; > + * @regmap: register map of parent device; > + * @timeout: defined timeout in sec.; > + * @hw_timeout: real timeout set in hw; > + * It will be roundup base of 2 in WD type 1, > + * in WD type 2 it will be same number of sec as timeout; > + * @action_idx: index for direct access to action register; > + * @timeout_idx:index for direct access to TO register; > + * @tleft_idx: index for direct access to time left register; > + * @ping_idx: index for direct access to ping register; > + * @reset_idx: index for direct access to reset cause register; > + * @wd_type: watchdog HW type; > + * @hw_timeout: actual HW timeout; > + * @io_lock: spinlock for io access; watchdog device access is already synchromized in the watchdog core. Why is this additional lock needed ? > + */ > +struct mlxreg_wdt { > + struct watchdog_device wdd; > + struct mlxreg_core_platform_data *pdata; > + void *regmap; > + int action_idx; > + int timeout_idx; > + int tleft_idx; > + int ping_idx; > + int reset_idx; > + enum mlxreg_wdt_type wdt_type; > + u8 hw_timeout; > + spinlock_t io_lock; /* the lock for io operations */ > +}; > + > +static int mlxreg_wdt_roundup_to_base_2(struct mlxreg_wdt *wdt, int timeout) > +{ > + timeout *= MLXREG_WDT_CLOCK_SCALE; > + > + wdt->hw_timeout = order_base_2(timeout); > + dev_info(wdt->wdd.parent, > + "watchdog %s timeout %d was rounded up to %lu (msec)\n", > + wdt->wdd.info->identity, timeout, roundup_pow_of_two(timeout)); Please no noise here. It is quite common for watchdog drivers to set a timeout value not exactly matching the real timeout. That is why the selected value is reported to userspace. > + > + return 0; What exactly is the point of this return value ? > +} > + > +static enum mlxreg_wdt_type > +mlxreg_wdt_check_watchdog_type(struct mlxreg_wdt *wdt, > + struct mlxreg_core_platform_data *pdata) > +{ > + if (pdata->data[wdt->action_idx].reg == > + pdata->data[wdt->ping_idx].reg && > + pdata->data[wdt->action_idx].mask == > + pdata->data[wdt->ping_idx].mask) > + return MLX_WDT_TYPE2; > + > + return MLX_WDT_TYPE1; > +} > + > +static int mlxreg_wdt_check_card_reset(struct mlxreg_wdt *wdt) > +{ > + struct mlxreg_core_data *reg_data; > + u32 regval; > + int rc; > + > + if (wdt->reset_idx == -EINVAL) > + return -EINVAL; > + Return values from this function are not checked and are thus pointless. > + if (!(wdt->wdd.info->options & WDIOF_CARDRESET)) > + return 0; > + > + spin_lock(&wdt->io_lock); > + reg_data = &wdt->pdata->data[wdt->reset_idx]; > + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); > + spin_unlock(&wdt->io_lock); > + if (rc) > + goto read_error; > + > + if (regval & ~reg_data->mask) { > + wdt->wdd.bootstatus = WDIOF_CARDRESET; > + dev_info(wdt->wdd.parent, > + "watchdog previously reset the CPU\n"); > + } > + > +read_error: > + return rc; > +} > + > +static int mlxreg_wdt_start(struct watchdog_device *wdd) > +{ > + struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); > + struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->action_idx]; > + u32 regval; > + int rc; > + > + spin_lock(&wdt->io_lock); > + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); > + if (rc) { > + spin_unlock(&wdt->io_lock); > + goto read_error; > + } > + > + regval = (regval & reg_data->mask) | BIT(reg_data->bit); > + rc = regmap_write(wdt->regmap, reg_data->reg, regval); I don't immediately see why regmap_update_bits() would not work here. > + spin_unlock(&wdt->io_lock); > + if (!rc) { > + set_bit(WDOG_HW_RUNNING, &wdt->wdd.status); Setting WDOG_HW_RUNNING is only necessary if set during probe, or in the stop function if the watchdog can not be stopped. It should not be necessary here. [ I see you are calling this function from probe. Set the bit there. ] > + dev_info(wdd->parent, "watchdog %s started\n", > + wdd->info->identity); Is all this noise really necessary ? > + } > + > +read_error: > + return rc; > +} > + > +static int mlxreg_wdt_stop(struct watchdog_device *wdd) > +{ > + struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); > + struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->action_idx]; > + u32 regval; > + int rc; > + > + spin_lock(&wdt->io_lock); > + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); > + if (rc) { > + spin_unlock(&wdt->io_lock); > + goto read_error; > + } > + > + regval = (regval & reg_data->mask) & ~BIT(reg_data->bit); > + rc = regmap_write(wdt->regmap, reg_data->reg, regval); I don't immediately see why regmap_update_bits() would not work here. > + spin_unlock(&wdt->io_lock); > + if (!rc) > + dev_info(wdd->parent, "watchdog %s stopped\n", > + wdd->info->identity); > + > +read_error: > + return rc; > +} > + > +static int mlxreg_wdt_ping(struct watchdog_device *wdd) > +{ > + struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); > + struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->ping_idx]; > + u32 regval; > + int rc; > + > + spin_lock(&wdt->io_lock); > + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); > + if (rc) > + goto read_error; > + > + regval = (regval & reg_data->mask) | BIT(reg_data->bit); > + rc = regmap_write(wdt->regmap, reg_data->reg, regval); Same as above. Besides, the only difference to the start function is the logging noise there. I would suggest to drop the noise as well as this function. > + > +read_error: > + spin_unlock(&wdt->io_lock); > + > + return rc; > +} > + > +static int mlxreg_wdt_set_timeout(struct watchdog_device *wdd, > + unsigned int timeout) > +{ > + struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); > + struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->timeout_idx]; > + u32 regval, set_time; > + int rc; > + > + spin_lock(&wdt->io_lock); > + > + if (wdt->wdt_type == MLX_WDT_TYPE1) { > + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); > + if (rc) > + goto read_error; > + mlxreg_wdt_roundup_to_base_2(wdt, timeout); > + regval = (regval & reg_data->mask) | wdt->hw_timeout; > + /* Rowndown to actual closest number of sec. */ > + set_time = MLXREG_WDT_HW_TIMEOUT_CONVERT(wdt->hw_timeout); The code is expected to set the value of "wdd->timeout" to the actual timeout. This is why it is set here, and not in the calling code. > + } else { > + wdt->hw_timeout = timeout; > + set_time = timeout; > + regval = timeout; > + } > + > + watchdog_init_timeout(&wdt->wdd, set_time, wdd->parent); This is wrong here. Just set wdd->timeout. watchdog_init_timeout() should only be called in the probe function. > + rc = regmap_write(wdt->regmap, reg_data->reg, regval); > + > +read_error: > + spin_unlock(&wdt->io_lock); > + if (!rc) { > + if (watchdog_active(wdd)) { This warrants an explanation: Presumably updating the timeout requires it to be stopped and restarted. Overall, however, it would probably be simpler to call regmap_update_bits() twice directly here. > + rc = mlxreg_wdt_stop(wdd); > + if (!rc) > + rc = mlxreg_wdt_start(wdd); > + } > + } > + > + return rc; > +} > + > +static unsigned int mlxreg_wdt_get_timeleft(struct watchdog_device *wdd) > +{ > + struct mlxreg_wdt *wdt = watchdog_get_drvdata(wdd); > + struct mlxreg_core_data *reg_data = &wdt->pdata->data[wdt->tleft_idx]; > + u32 regval; > + int rc; > + > + if (wdt->wdt_type == MLX_WDT_TYPE1) > + return 0; > + Should never get here, thus this check is unnecessary. > + spin_lock(&wdt->io_lock); > + rc = regmap_read(wdt->regmap, reg_data->reg, ®val); > + spin_unlock(&wdt->io_lock); > + > + return rc == 0 ? regval : 0; > +} > + > +static const struct watchdog_ops mlxreg_wdt_ops_type1 = { > + .start = mlxreg_wdt_start, > + .stop = mlxreg_wdt_stop, > + .ping = mlxreg_wdt_ping, > + .set_timeout = mlxreg_wdt_set_timeout, > + .owner = THIS_MODULE, > +}; > + > +static const struct watchdog_ops mlxreg_wdt_ops_type2 = { > + .start = mlxreg_wdt_start, > + .stop = mlxreg_wdt_stop, > + .ping = mlxreg_wdt_ping, > + .set_timeout = mlxreg_wdt_set_timeout, > + .get_timeleft = mlxreg_wdt_get_timeleft, > + .owner = THIS_MODULE, > +}; > + > +static const struct watchdog_info mlxreg_wdt_main_info = { > + .options = MLXREG_WDT_OPTIONS_BASE > + | WDIOF_CARDRESET, > + .identity = "mlx-wdt-main", > +}; > + > +static const struct watchdog_info mlxreg_wdt_aux_info = { > + .options = MLXREG_WDT_OPTIONS_BASE > + | WDIOF_ALARMONLY, > + .identity = "mlx-wdt-aux", > +}; > + > +static int mlxreg_wdt_config(struct mlxreg_wdt *wdt, > + struct mlxreg_core_platform_data *pdata) > +{ > + struct mlxreg_core_data *data = pdata->data; > + int i; > + > + wdt->reset_idx = -EINVAL; > + for (i = 0; i < pdata->counter; i++, data++) { > + if (strnstr(data->label, "action", sizeof(data->label))) > + wdt->action_idx = i; > + else if (strnstr(data->label, "timeout", sizeof(data->label))) > + wdt->timeout_idx = i; > + else if (strnstr(data->label, "timeleft", sizeof(data->label))) > + wdt->tleft_idx = i; > + else if (strnstr(data->label, "ping", sizeof(data->label))) > + wdt->ping_idx = i; > + else if (strnstr(data->label, "reset", sizeof(data->label))) > + wdt->reset_idx = i; > + } > + > + wdt->pdata = pdata; > + if (strnstr(pdata->identity, mlxreg_wdt_main_info.identity, > + sizeof(mlxreg_wdt_main_info.identity))) > + wdt->wdd.info = &mlxreg_wdt_main_info; > + else > + wdt->wdd.info = &mlxreg_wdt_aux_info; > + > + wdt->wdt_type = mlxreg_wdt_check_watchdog_type(wdt, pdata); > + if (wdt->wdt_type == MLX_WDT_TYPE2) { > + wdt->wdd.ops = &mlxreg_wdt_ops_type2; > + wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE2; > + } else { > + wdt->wdd.ops = &mlxreg_wdt_ops_type1; > + wdt->wdd.max_timeout = MLXREG_WDT_MAX_TIMEOUT_TYPE1; > + } > + wdt->wdd.min_timeout = MLXREG_WDT_MIN_TIMEOUT; > + > + return 0; > +} > + > +static int mlxreg_wdt_init_timeout(struct mlxreg_wdt *wdt, > + struct mlxreg_core_platform_data *pdata) > +{ > + u32 timeout; > + > + timeout = pdata->data[wdt->timeout_idx].health_cntr; > + return mlxreg_wdt_set_timeout(&wdt->wdd, timeout); > +} > + > +static int mlxreg_wdt_probe(struct platform_device *pdev) > +{ > + struct mlxreg_core_platform_data *pdata; > + struct mlxreg_wdt *wdt; > + int rc; > + > + pdata = dev_get_platdata(&pdev->dev); > + if (!pdata) { > + dev_err(&pdev->dev, "Failed to get platform data.\n"); > + return -EINVAL; > + } > + wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL); > + if (!wdt) > + return -ENOMEM; > + > + spin_lock_init(&wdt->io_lock); > + > + wdt->wdd.parent = &pdev->dev; > + wdt->regmap = pdata->regmap; > + mlxreg_wdt_config(wdt, pdata); > + > + if ((pdata->features & MLXREG_CORE_WD_FEATURE_NOSTOP_AFTER_START)) > + watchdog_set_nowayout(&wdt->wdd, WATCHDOG_NOWAYOUT); That is a bit different than the requested functionality, isn't it ? > + watchdog_stop_on_reboot(&wdt->wdd); > + watchdog_set_drvdata(&wdt->wdd, wdt); > + mlxreg_wdt_check_card_reset(wdt); > + rc = mlxreg_wdt_init_timeout(wdt, pdata); > + if (rc) { > + dev_err(&pdev->dev, > + "Cannot init watchdog timeout (err=%d)\n", rc); > + return rc; > + } > + > + rc = devm_watchdog_register_device(&pdev->dev, &wdt->wdd); > + if (rc) { > + dev_err(&pdev->dev, > + "Cannot register watchdog device (err=%d)\n", rc); > + return rc; > + } > + > + if ((pdata->features & MLXREG_CORE_WD_FEATURE_START_AT_BOOT)) > + mlxreg_wdt_start(&wdt->wdd); This should be called before registering the watchdog, and WDOG_HW_RUNNING should be set here to let the core know that the watchdog is running. > + > + return rc; rc is always 0 here. > +} > + > +static int mlxreg_wdt_remove(struct platform_device *pdev) > +{ > + struct mlxreg_wdt *wdt = dev_get_platdata(&pdev->dev); > + > + mlxreg_wdt_stop(&wdt->wdd); Call watchdog_stop_on_unregister() before registering the watchdog. > + watchdog_unregister_device(&wdt->wdd); This is wrong. The watchdog is registered with a devm_ function and is thus automatically unregistered. > + > + return 0; > +} > + > +static struct platform_driver mlxreg_wdt_driver = { > + .probe = mlxreg_wdt_probe, > + .remove = mlxreg_wdt_remove, > + .driver = { > + .name = "mlx-wdt", > + }, > +}; > + > +module_platform_driver(mlxreg_wdt_driver); > + > +MODULE_AUTHOR("Michael Shych <michaelsh@xxxxxxxxxxxx>"); > +MODULE_DESCRIPTION("Mellanox watchdog driver"); > +MODULE_LICENSE("GPL"); > +MODULE_ALIAS("platform:mlx-wdt"); > -- > 2.11.0 >