current hung_task_check_interval_secs and hung_task_timeout_secs only supports seconds. In some cases,the TASK_UNINTERRUPTIBLE state takes less than 1 second,may need to hung task trigger panic get ramdump or print all cpu task. modify hung_task_check_interval_secs to hung_task_check_interval_msecs, check interval use milliseconds. Add hung_task_timeout_msecs file to set milliseconds. task timeout = hung_task_timeout_secs * 1000 + hung_task_timeout_msecs. Signed-off-by: yang che <chey84736@xxxxxxxxx> --- v2->v3: Fix some format issues. add use msecs_to_jiffies,jiffies_to_msec. because use timeout = secs * 1000 + msecs,so sysctl_hung_task_timeout_msec = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT * MSEC_PER_SEC; will cause timeout is CONFIG_DEFAULT_HUNG_TASK_TIMEOUT double. v1->v2: add hung_task_check_interval_millisecs,hung_task_timeout_millisecs. fix writing to the millisecond file silently overrides the setting in the seconds file. [1]https://lore.kernel.org/lkml/CAN_w4MWMfoDGfpON-bYHrU=KuJG2vpFj01ZbN4r-iwM4AyyuGw@xxxxxxxxxxxxxx [2]https://lore.kernel.org/lkml/20200705171633.GU25523@xxxxxxxxxxxxxxxxxxxx/ include/linux/sched/sysctl.h | 3 ++- kernel/hung_task.c | 31 ++++++++++++++++++++++--------- kernel/sysctl.c | 12 ++++++++++-- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 660ac49..41b426e 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -16,8 +16,9 @@ extern unsigned int sysctl_hung_task_all_cpu_backtrace; extern int sysctl_hung_task_check_count; extern unsigned int sysctl_hung_task_panic; +extern unsigned long sysctl_hung_task_timeout_msecs; extern unsigned long sysctl_hung_task_timeout_secs; -extern unsigned long sysctl_hung_task_check_interval_secs; +extern unsigned long sysctl_hung_task_check_interval_msecs; extern int sysctl_hung_task_warnings; int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); diff --git a/kernel/hung_task.c b/kernel/hung_task.c index ce76f49..bac6f33 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -37,16 +37,23 @@ int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; * the RCU grace period. So it needs to be upper-bound. */ #define HUNG_TASK_LOCK_BREAK (HZ / 10) +#define MSEC_PER_SEC 1000L /* - * Zero means infinite timeout - no checking done: + * Zero and sysctl_hung_task_timeout_msecs zero means infinite timeout - no checking done: */ unsigned long __read_mostly sysctl_hung_task_timeout_secs = CONFIG_DEFAULT_HUNG_TASK_TIMEOUT; /* - * Zero (default value) means use sysctl_hung_task_timeout_secs: + * Zero (default value) means only use sysctl_hung_task_timeout_secs */ -unsigned long __read_mostly sysctl_hung_task_check_interval_secs; +unsigned long __read_mostly sysctl_hung_task_timeout_msecs; + +/* + * Zero (default value) means use + * sysctl_hung_task_timeout_secs * MSEC_PER_SEC + sysctl_hung_task_timeout_msecs + */ +unsigned long __read_mostly sysctl_hung_task_check_interval_msecs; int __read_mostly sysctl_hung_task_warnings = 10; @@ -108,7 +115,8 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) t->last_switch_time = jiffies; return; } - if (time_is_after_jiffies(t->last_switch_time + timeout * HZ)) + + if (time_is_after_jiffies(t->last_switch_time + msecs_to_jiffies(timeout))) return; trace_sched_process_hang(t); @@ -126,13 +134,17 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) if (sysctl_hung_task_warnings) { if (sysctl_hung_task_warnings > 0) sysctl_hung_task_warnings--; - pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", - t->comm, t->pid, (jiffies - t->last_switch_time) / HZ); + + pr_err("INFO: task %s:%d blocked for more than %ld.%03ld seconds.\n", + t->comm, t->pid, + jiffies_to_msecs(jiffies - t->last_switch_time) / MSEC_PER_SEC, + jiffies_to_msecs(jiffies - t->last_switch_time) % MSEC_PER_SEC); pr_err(" %s %s %.*s\n", print_tainted(), init_utsname()->release, (int)strcspn(init_utsname()->version, " "), init_utsname()->version); pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" + "\"echo 0 > /proc/sys/kernel/hung_task_timeout_msecs\"" " disables this message.\n"); sched_show_task(t); hung_task_show_lock = true; @@ -217,7 +229,7 @@ static long hung_timeout_jiffies(unsigned long last_checked, unsigned long timeout) { /* timeout of 0 will disable the watchdog */ - return timeout ? last_checked - jiffies + timeout * HZ : + return timeout ? last_checked - jiffies + msecs_to_jiffies(timeout) : MAX_SCHEDULE_TIMEOUT; } @@ -281,8 +293,9 @@ static int watchdog(void *dummy) set_user_nice(current, 0); for ( ; ; ) { - unsigned long timeout = sysctl_hung_task_timeout_secs; - unsigned long interval = sysctl_hung_task_check_interval_secs; + unsigned long timeout = sysctl_hung_task_timeout_secs * MSEC_PER_SEC + + sysctl_hung_task_timeout_msecs; + unsigned long interval = sysctl_hung_task_check_interval_msecs; long t; if (interval == 0) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index db1ce7a..5c52759 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -2476,6 +2476,14 @@ static struct ctl_table kern_table[] = { .extra1 = SYSCTL_ZERO, }, { + .procname = "hung_task_timeout_msecs", + .data = &sysctl_hung_task_timeout_msecs, + .maxlen = sizeof(unsigned long), + .mode = 0644, + .proc_handler = proc_dohung_task_timeout_secs, + .extra2 = &hung_task_timeout_max, + }, + { .procname = "hung_task_timeout_secs", .data = &sysctl_hung_task_timeout_secs, .maxlen = sizeof(unsigned long), @@ -2484,8 +2492,8 @@ static struct ctl_table kern_table[] = { .extra2 = &hung_task_timeout_max, }, { - .procname = "hung_task_check_interval_secs", - .data = &sysctl_hung_task_check_interval_secs, + .procname = "hung_task_check_interval_msecs", + .data = &sysctl_hung_task_check_interval_msecs, .maxlen = sizeof(unsigned long), .mode = 0644, .proc_handler = proc_dohung_task_timeout_secs, -- 2.7.4