RE: [PATCH] TCO Watchdog warning interrupt driver creation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thanks Darren and Guenter for your review.
Here is a new patchset, including the feature in the watchdog driver and using a boot parameter to be enabled.
François-Nicolas

Subject: [PATCH] TCO watchdog warning interrupt handler

Signed-off-by: Francois-Nicolas Muller <francois-nicolas.muller@xxxxxxxxx>
---
 drivers/watchdog/iTCO_wdt.c |   66 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c
index e802a54..1ce9ad8 100644
--- a/drivers/watchdog/iTCO_wdt.c
+++ b/drivers/watchdog/iTCO_wdt.c
@@ -49,6 +49,8 @@
 /* Module and version information */
 #define DRV_NAME	"iTCO_wdt"
 #define DRV_VERSION	"1.11"
+#define DRV_NAME_ACPI	"iTCO_wdt_wirq"
+#define TCO_CLASS	DRV_NAME
 
 /* Includes */
 #include <linux/module.h>		/* For module specific items */
@@ -68,6 +70,9 @@
 #include <linux/pm.h>			/* For suspend/resume */
 #include <linux/mfd/core.h>
 #include <linux/mfd/lpc_ich.h>
+#include <linux/nmi.h>
+#include <linux/acpi.h>
+#include <acpi/actypes.h>
 
 #include "iTCO_vendor.h"
 
@@ -107,6 +112,12 @@ static struct {		/* this is private data for the iTCO_wdt device */
 	bool started;
 } iTCO_wdt_private;
 
+static const struct acpi_device_id iTCO_wdt_ids[] = {
+	{"8086229C", 0},
+	{"", 0},
+};
+MODULE_DEVICE_TABLE(acpi, iTCO_wdt_ids);
+
 /* module parameters */
 #define WATCHDOG_TIMEOUT 30	/* 30 sec default heartbeat */
 static int heartbeat = WATCHDOG_TIMEOUT;  /* in seconds */
@@ -126,6 +137,11 @@ module_param(turn_SMI_watchdog_clear_off, int, 0);
 MODULE_PARM_DESC(turn_SMI_watchdog_clear_off,
 	"Turn off SMI clearing watchdog (depends on TCO-version)(default=1)");
 
+static bool warn_irq;
+module_param(warn_irq, bool, 0);
+MODULE_PARM_DESC(warn_irq,
+	"Watchdog trigs a panic at first expiration (default=0)");
+
 /*
  * Some TCO specific functions
  */
@@ -200,6 +216,37 @@ static int iTCO_wdt_unset_NO_REBOOT_bit(void)
 	return ret; /* returns: 0 = OK, -EIO = Error */
 }
 
+static u32 iTCO_wdt_wirq(acpi_handle gpe_device, u32 gpe, void *context)
+{
+	pr_warn("[SHTDWN] %s, WATCHDOG TIMEOUT HANDLER!\n", __func__);
+
+	trigger_all_cpu_backtrace();
+	panic("Kernel Watchdog");
+
+	/* This code should not be reached */
+	return IRQ_HANDLED;
+}
+
+static int iTCO_wdt_acpi_add(struct acpi_device *device)
+{
+	unsigned long long gpe;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(device->handle, "_GPE", NULL, &gpe);
+	if (ACPI_FAILURE(status))
+		return -EINVAL;
+
+	status = acpi_install_gpe_handler(NULL, gpe, ACPI_GPE_EDGE_TRIGGERED,
+					  iTCO_wdt_wirq, NULL);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	acpi_enable_gpe(NULL, gpe);
+
+	pr_info("interrupt=SCI GPE=0x%02llx", gpe);
+	return 0;
+}
+
 static int iTCO_wdt_start(struct watchdog_device *wd_dev)
 {
 	unsigned int val;
@@ -628,6 +675,15 @@ static struct platform_driver iTCO_wdt_driver = {
 	},
 };
 
+static struct acpi_driver iTCO_wdt_acpi_driver = {
+	.name = DRV_NAME_ACPI,
+	.class = TCO_CLASS,
+	.ids = iTCO_wdt_ids,
+	.ops = {
+		.add = iTCO_wdt_acpi_add,
+	},
+};
+
 static int __init iTCO_wdt_init_module(void)
 {
 	int err;
@@ -638,12 +694,22 @@ static int __init iTCO_wdt_init_module(void)
 	if (err)
 		return err;
 
+	if (warn_irq) {
+		err = acpi_bus_register_driver(&iTCO_wdt_acpi_driver);
+		if (err) {
+			platform_driver_unregister(&iTCO_wdt_driver);
+			return err;
+		}
+	}
+
 	return 0;
 }
 
 static void __exit iTCO_wdt_cleanup_module(void)
 {
 	platform_driver_unregister(&iTCO_wdt_driver);
+	if (warn_irq)
+		acpi_bus_unregister_driver(&iTCO_wdt_acpi_driver);
 	pr_info("Watchdog Module Unloaded\n");
 }
 
-- 
1.7.9.5



-----Original Message-----
From: Guenter Roeck [mailto:linux@xxxxxxxxxxxx] 
Sent: Thursday, December 11, 2014 3:30 PM
To: Darren Hart; Muller, Francois-nicolas
Cc: 'platform-driver-x86@xxxxxxxxxxxxxxx'; Rafael Wysocki; Linux ACPI Mailing List; linux-watchdog@xxxxxxxxxxxxxxx; Wim Van Sebroeck
Subject: Re: [PATCH] TCO Watchdog warning interrupt driver creation

On 12/10/2014 08:04 PM, Darren Hart wrote:
> On Fri, Dec 05, 2014 at 10:08:45AM +0000, Muller, Francois-nicolas wrote:
>> This driver provides support for TCO watchdog warning interrupt.
>>
>
> Hi Francois,
>
> Cc: Rafael and linux-acpi per his request on ACPI drivers (even if 
> they are for platform/drivers/x86).
>
>> This feature is useful to root cause watchdog expiration.
>
> Would this not be better suited as a debug option to the iTCO_wdt.c 
> driver under drivers/watchdog?
>

I agree, this should be in the watchdog driver.

Normally a watchdog is expected to reset the system if it fires. This is its "normal" operation. Correct, some watchdogs create an NMI or some other interrupt before this happens, as a last warning to give the system a chance to do something about it.

Catching this interrupt and dumping a message maybe a good idea, but letting the system panic as response seems to be excessive and may even be counter-productive. Maybe there should be a run-time option (eg a boot parameter), in addition or instead of the Kconfig entry, to enable it, if it is considered useful for debugging.

I find the headline in the Kconfig entry a bit misleading. It states "support interrupt", while what it really does is to crash the system if that interrupt actually happens. I don't usually call that "support".

Thanks,
Guenter

> Cc: Wim Van Sebroeck and linux-watchdog
>
>> Upon first expiration of the TCO watchdog, a warning interrupt is 
>> fired to this driver, which calls panic() function and dumps debug 
>> information (registers and call stacks).
>
> Nit: Newlines between paragraphs for legibility here and in comments please.
>
>> This implies TCO watchdog driver has enabled the interrupt (SCI) and 
>> ACPI tables contain GPE mapping information.
>> After the interrupt has been fired, TCO watchdog reloads 
>> automatically and upon second expiration it trigs a reset of the platform.
>
> triggers
>
>>
>> Change-Id: I39b615b59dd4336bf208454f08b3e9eac9eb2880
>
> Please run through checkpatch.pl (Gerrit change-id's should be removed).
>
>> Signed-off-by: Francois-Nicolas Muller 
>> <francois-nicolas.muller@xxxxxxxxx>
>> ---
>> drivers/platform/x86/Kconfig          |   13 +++++
>> drivers/platform/x86/Makefile         |    1 +
>> drivers/platform/x86/intel_warn_int.c |   98 +++++++++++++++++++++++++++++++++
>> 3 files changed, 112 insertions(+)
>> create mode 100644 drivers/platform/x86/intel_warn_int.c
>>
>> diff --git a/drivers/platform/x86/Kconfig 
>> b/drivers/platform/x86/Kconfig index 5ae65c1..79cba16 100644
>> --- a/drivers/platform/x86/Kconfig
>> +++ b/drivers/platform/x86/Kconfig
>> @@ -827,4 +827,17 @@ config INTEL_BAYTRAIL_MBI
>>                   Interface. This is a requirement for systems that need to configure
>>                   the PUNIT for power management features such as RAPL.
>> +config INTEL_WARN_INT
>> +             tristate "TCO Watchdog warning interrupt"
>> +             depends on ITCO_WDT
>> +             ---help---
>> +               This driver provides support for TCO watchdog warning interrupt.
>> +               Upon first expiration of the TCO watchdog, a warning interrupt is
>> +               fired and the driver calls panic() function to dump 
>> +debug information
>
> s/function//
>
>> +               (registers and call stacks).
>
> newline
>
>> +               At the same time, the TCO watchdog reloads with 2.4 seconds timeout
>> +               value and runs till the second expiration. At the 
>> + second expiration of
>
> s/till/until/
>
>> +               the TCO watchdog, the platform resets (the dump is supposed to last less
>> +               than 2.4 seconds).
>> +
>> endif # X86_PLATFORM_DEVICES
>> diff --git a/drivers/platform/x86/Makefile 
>> b/drivers/platform/x86/Makefile index 32caae3..2d47b4d 100644
>> --- a/drivers/platform/x86/Makefile
>> +++ b/drivers/platform/x86/Makefile
>> @@ -58,3 +58,4 @@ obj-$(CONFIG_PVPANIC)                               += pvpanic.o
>> obj-$(CONFIG_INTEL_BAYTRAIL_MBI)                += intel_baytrail.o
>> obj-$(CONFIG_INTEL_SOC_PMIC)        += dc_ti_cc.o
>> obj-$(CONFIG_ACPI)                   += intel_em_config.o
>> +obj-$(CONFIG_INTEL_WARN_INT)      += intel_warn_int.o
>> diff --git a/drivers/platform/x86/intel_warn_int.c 
>> b/drivers/platform/x86/intel_warn_int.c
>> new file mode 100644
>> index 0000000..7ec8b73
>> --- /dev/null
>> +++ b/drivers/platform/x86/intel_warn_int.c
>> @@ -0,0 +1,98 @@
>> +/*
>> + * intel_warn_int.c - This driver provides support for TCO watchdog 
>> +warning
>> + * interrupt.
>
> Newlines between paragraphs in this section too.
>
>> + * This feature is useful to root cause watchdog expiration.
>> + * Upon first expiration of the TCO watchdog, a warning interrupt is 
>> + fired
>> + * to this driver, which calls panic() function and dumps debug 
>> + information
>
> s/function//
>
>> + * (registers and call stacks).
>> + * This implies TCO watchdog driver has enabled the interrupt (SCI) 
>> + and ACPI
>
> the ACPI...
>
>> + * tables contain GPE mapping information.
>
> the GPE...
>
>> + * After the interrupt has been fired, TCO watchdog reloads 
>> + automatically and
>> + * upon second expiration it trigs a reset of the platform.
>
> triggers
>
>> + * Copyright (c) 2014, Intel Corporation.
>
> All rights reserved.
>
>> + *
>> + * This program is free software; you can redistribute it and/or 
>> + modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but 
>> + WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of 
>> + MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public 
>> + License for
>> + * more details.
>> + *
>
> extra * line, can remove.
>
>> + */
>> +
>> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
>> +
>> +#include <linux/module.h>
>> +#include <linux/types.h>
>> +#include <linux/errno.h>
>> +#include <linux/kernel.h>
>> +#include <linux/init.h>
>> +#include <linux/interrupt.h>
>> +#include <linux/acpi.h>
>> +#include <linux/nmi.h>
>> +#include <acpi/actypes.h>
>> +
>> +#define DRV_NAME "warn_int"
>> +#define TCO_CLASS DRV_NAME
>> +
>> +static const struct acpi_device_id tco_device_ids[] = {
>> +             {"8086229C", 0},
>
> What is this device ID? Is it specific to a debug ID for the WDT? If 
> not, will this eventually conflict with a non-debug driver for this WDT?
>
>> +             {"", 0},
>> +};
>> +MODULE_DEVICE_TABLE(acpi, tco_device_ids);
>> +
>> +static u32 warn_irq_handler(acpi_handle gpe_device, u32 gpe, void 
>> +*context) {
>> +             pr_warn("[SHTDWN] %s, WATCHDOG TIMEOUT HANDLER!\n", 
>> +__func__);
>
> Indenting with 13 spaces? Please review Documentation/CodingStyle and 
> update the patch. Rather than point out all these sorts of errors, I'm 
> goign to stop here - please read the doc and correct throughout.
>
>> +
>> +             trigger_all_cpu_backtrace();
>> +             panic("Kernel Watchdog");
>> +
>> +             /* This code should not be reached */
>> +             return IRQ_HANDLED;
>> +}
>> +
>> +static int acpi_tco_add(struct acpi_device *device) {
>> +             acpi_status status;
>> +             unsigned long long tco_gpe;
>
> Declare variables in decreasing line length order.
>
>> +
>> +             status = acpi_evaluate_integer(device->handle, "_GPE", NULL, &tco_gpe);
>> +             if (ACPI_FAILURE(status))
>> +                             return -EINVAL;
>> +
>> +             status = acpi_install_gpe_handler(NULL, tco_gpe,
>> +                                                                              ACPI_GPE_EDGE_TRIGGERED,
>> +                                                                              warn_irq_handler, NULL);
>> +             if (ACPI_FAILURE(status))
>> +                             return -ENODEV;
>> +
>> +             acpi_enable_gpe(NULL, tco_gpe);
>> +
>> +             pr_info("initialized. Interrupt=SCI GPE 0x%02llx", tco_gpe);
>> +             return 0;
>> +}
>> +
>> +static struct acpi_driver tco_driver = {
>> +             .name = "warn_int",
>> +             .class = TCO_CLASS,
>> +             .ids = tco_device_ids,
>> +             .ops = {
>> +                             .add = acpi_tco_add,
>> +             },
>> +};
>> +
>> +static int __init warn_int_init(void) {
>> +             return acpi_bus_register_driver(&tco_driver);
>> +}
>> +
>> +module_init(warn_int_init);
>> +/* no module_exit, this driver shouldn't be unloaded */
>> +
>> +MODULE_AUTHOR("Francois-Nicolas Muller 
>> +<francois-nicolas.muller@xxxxxxxxx>");
>> +MODULE_DESCRIPTION("Intel TCO WatchDog Warning Interrupt"); 
>> +MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" DRV_NAME);
>> --
>> 1.7.9.5
>>
>

--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux