current hung_task_check_interval_secs and hung_task_timeout_secs only supports seconds. In some cases,the TASK_UNINTERRUPTIBLE state takes less than 1 second,may need to hung task trigger panic get ramdump or print all cpu task. modify hung_task_check_interval_secs to hung_task_check_interval_millisecs, check interval use milliseconds. Add hung_task_timeout_millisecs file to set milliseconds. task timeout = hung_task_timeout_secs * 1000 + hung_task_timeout_millisecs. (timeout * HZ / 1000) calculate how many are generated jiffies in timeout milliseconds. Signed-off-by: yang che <chey84736@xxxxxxxxx> --- v1->v2: add hung_task_check_interval_millisecs,hung_task_timeout_millisecs. fix writing to the millisecond file silently overrides the setting in the seconds file. [1]https://lore.kernel.org/lkml/CAN_w4MWMfoDGfpON-bYHrU=KuJG2vpFj01ZbN4r-iwM4AyyuGw@xxxxxxxxxxxxxx include/linux/sched/sysctl.h | 3 ++- kernel/hung_task.c | 25 ++++++++++++++++++------- kernel/sysctl.c | 12 ++++++++++-- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 660ac49..179c331 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -16,8 +16,9 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace; extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_timeout_millisecs; extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_check_interval_secs; +extern unsigned long sysctl_hung_task_check_interval_millisecs; extern int sysctl_hung_task_warnings; int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ce76f49..809c999 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -37,6 +37,7 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; * the RCU grace period. So it needs to be upper-bound. */ #define HUNG_TASK_LOCK_BREAK (HZ / 10) +#define SECONDS 1000 /* * Zero means infinite timeout - no checking done: @@ -44,9 +45,14 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; /* + * Zero means only use sysctl_hung_task_timeout_secs + */ +unsigned long __read_mostly sysctl_hung_task_timeout_millisecs; + +/* * Zero (default value) means use sysctl_hung_task_timeout_secs: */ -unsigned long __read_mostly sysctl_hung_task_check_interval_secs; +unsigned long __read_mostly sysctl_hung_task_check_interval_millisecs; int __read_mostly sysctl_hung_task_warnings = 10; @@ -108,7 +114,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_time = jiffies; return; } - if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) + + if (time_is_after_jiffies(t->last_switch_time + (timeout * HZ) / SECONDS)) return; trace_sched_process_hang(t); @@ -126,13 +133,16 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_warnings) { if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; - pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", - t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); + + pr_err("INFO: task %s:%d blocked for more than %ld seconds %ld milliseconds.\n", + t->comm, t->pid, (jiffies - t->last_switch_time) / HZ, + (jiffies - t->last_switch_time) % HZ * (SECONDS / HZ)); pr_err(" %s %s %.*s\n", print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" + "\"echo 0 > /proc/sys/kernel/hung_task_timeout_millisecs\"" " disables this message.\n"); sched_show_task(t); hung_task_show_lock = true; @@ -217,7 +227,7 @@ static long hung_timeout_jiffies(unsigned long last_checked, unsigned long timeout) { /* timeout of 0 will disable the watchdog */ - return timeout ? last_checked - jiffies + timeout * HZ : + return timeout ? last_checked - jiffies + (timeout * HZ) / SECONDS : MAX_SCHEDULE_TIMEOUT; } @@ -281,8 +291,9 @@ static int watchdog(void *dummy) set_user_nice(current, 0); for ( ; ; ) { - unsigned long timeout = sysctl_hung_task_timeout_secs; - unsigned long interval = sysctl_hung_task_check_interval_secs; + unsigned long timeout = sysctl_hung_task_timeout_secs * SECONDS + + sysctl_hung_task_timeout_millisecs; + unsigned long interval = sysctl_hung_task_check_interval_millisecs; long t; if (interval == 0) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index db1ce7a..8f7ac33 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2476,6 +2476,14 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, }, { + .procname = "hung_task_timeout_millisecs", + .data = &sysctl_hung_task_timeout_millisecs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, + { .procname = "hung_task_timeout_secs", .data = &sysctl_hung_task_timeout_secs, .maxlen = sizeof(unsigned long), @@ -2484,8 +2492,8 @@ static struct ctl_table kern_table[] = { .extra2 = &hung_task_timeout_max, }, { - .procname = "hung_task_check_interval_secs", - .data = &sysctl_hung_task_check_interval_secs, + .procname = "hung_task_check_interval_millisecs", + .data = &sysctl_hung_task_check_interval_millisecs, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_dohung_task_timeout_secs, -- 2.7.4