On 8/22/2017 7:24 AM, Zhang, Shile (NSB - CN/Hangzhou) wrote: > Hi, Imran, > Hi Shile, Thanks for getting back on this. > I think a "unmonitored list" is better than "monitor list", because we want khungtaskd can find out the "unexpected" hung task, but not few in a list. > Then, for the fg tasks, which can put it in the "unmonitored list", for the bg tasks, I think we can tweak the timeout to control the duration. > Here fg and bg tasks were just an example and may not be applicable for all environments (non-android). But main intention here is to have some mechanism that would allow to monitor only specific tasks. As we have a sysctl option to enable/disable this mechanism we can at any time fall back to default mechanism of monitoring all tasks. Alternatively this can be done on task priority basis also, where we have a minimum priority and only tasks with priorities higher than this min priority would be monitored and if we keep this min priority to the least priority , all tasks would be monitored like the default case. Please let me know your feedback regarding this approach. Thanks and Regards, Imran > Thanks! > > BR, Shile > > -----Original Message----- > From: Imran Khan [mailto:kimran@xxxxxxxxxxxxxx] > Sent: Monday, August 21, 2017 6:26 PM > To: mingo@xxxxxxxxxx > Cc: imrank140517@xxxxxxxxx; Imran Khan <kimran@xxxxxxxxxxxxxx>; Luis R. Rodriguez <mcgrof@xxxxxxxxxx>; Kees Cook <keescook@xxxxxxxxxxxx>; Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx>; Zhang, Shile (NSB - CN/Hangzhou) <shile.zhang@xxxxxxxxxxxxxxx>; Matt Fleming <matt@xxxxxxxxxxxxxxxxxxx>; Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>; Vegard Nossum <vegard.nossum@xxxxxxxxxx>; Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx>; John Siddle <jsiddle@xxxxxxxxxx>; open list <linux-kernel@xxxxxxxxxxxxxxx>; open list:PROC SYSCTL <linux-fsdevel@xxxxxxxxxxxxxxx> > Subject: [PATCH] RFC: hung task: Check specific tasks for long uninterruptible sleep state > > khungtask by default monitors either all tasks or no tasks at all > for long unterruptible sleeps. > For Android like environments this arrangement is not optimal because > on one hand it may be permissible to have some background(bg) task > in uninterruptible sleep state for long duration while on the other > hand it may not be permissible to have some foreground(fg) task like > surfaceflinger in uninterruptible sleep state for long duration. > So it would be good to have some arrangement so that few specified tasks > can be monitored by khungtaskd, on a need basis. > This change introduces a sysctl option, /proc/sys/kernel/ > hung_task_check_selected, to enable monitoring of selected tasks using > khungtask daemon. If this sysctl option is enabled then only the tasks > specified in /proc/hung_task_monitor_list are monitored otherwise all > tasks are monitored, just like the default case. > > Signed-off-by: Imran Khan <kimran@xxxxxxxxxxxxxx> > --- > include/linux/sched/sysctl.h | 1 + > kernel/hung_task.c | 121 ++++++++++++++++++++++++++++++++++++++++++- > kernel/sysctl.c | 8 +++ > 3 files changed, 128 insertions(+), 2 deletions(-) > > diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h > index 0f5ecd4..05892f1 100644 > --- a/include/linux/sched/sysctl.h > +++ b/include/linux/sched/sysctl.h > @@ -10,6 +10,7 @@ > extern unsigned int sysctl_hung_task_panic; > extern unsigned long sysctl_hung_task_timeout_secs; > extern int sysctl_hung_task_warnings; > +extern int sysctl_hung_task_check_selected; > extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, > void __user *buffer, > size_t *lenp, loff_t *ppos); > diff --git a/kernel/hung_task.c b/kernel/hung_task.c > index 751593e..49f13fb 100644 > --- a/kernel/hung_task.c > +++ b/kernel/hung_task.c > @@ -16,12 +16,28 @@ > #include <linux/export.h> > #include <linux/sysctl.h> > #include <linux/utsname.h> > +#include <linux/uaccess.h> > +#include <linux/slab.h> > +#include <linux/proc_fs.h> > #include <linux/sched/signal.h> > #include <linux/sched/debug.h> > > #include <trace/events/sched.h> > > /* > + * Hung task that needs monitoring > + */ > +struct hung_task { > + struct list_head list; > + char comm[TASK_COMM_LEN]; > +}; > + > +static struct hung_task *monitor_list; > +int sysctl_hung_task_check_selected; > + > + > + > +/* > * The number of tasks checked: > */ > int __read_mostly sysctl_hung_task_check_count = PID_MAX_LIMIT; > @@ -76,6 +92,92 @@ static int __init hung_task_panic_setup(char *str) > .notifier_call = hung_task_panic, > }; > > +static void hung_task_monitor_setup(void) > +{ > + monitor_list = kmalloc(sizeof(*monitor_list), GFP_KERNEL); > + if (monitor_list) { > + INIT_LIST_HEAD(&monitor_list->list); > + memset(monitor_list->comm, 0, TASK_COMM_LEN); > + } > +} > + > + > +static int hung_task_info_show(struct seq_file *m, void *v) > +{ > + struct hung_task *ht; > + > + ht = list_entry(v, struct hung_task, list); > + seq_puts(m, ht->comm); > + > + return 0; > +} > + > +static void *hung_task_info_start(struct seq_file *m, loff_t *pos) > +{ > + return seq_list_start_head(&monitor_list->list, *pos); > +} > + > +static void *hung_task_info_next(struct seq_file *m, void *v, loff_t *pos) > +{ > + return seq_list_next(v, &monitor_list->list, pos); > +} > + > +static void hung_task_info_stop(struct seq_file *m, void *v) > +{ > +} > + > +const struct seq_operations hung_task_info_op = { > + .start = hung_task_info_start, > + .next = hung_task_info_next, > + .stop = hung_task_info_stop, > + .show = hung_task_info_show > +}; > + > +static int hung_task_info_open(struct inode *inode, struct file *file) > +{ > + return seq_open(file, &hung_task_info_op); > +} > + > +static ssize_t > +hung_task_info_write(struct file *file, const char __user *buf, size_t count, > + loff_t *offs) > +{ > + struct task_struct *g, *t; > + struct hung_task *ht = kmalloc(sizeof(*ht), GFP_KERNEL); > + > + if (!ht) > + return -ENOMEM; > + > + if (copy_from_user(ht->comm, buf, count)) > + return -EFAULT; > + ht->comm[count] = '\0'; > + > + for_each_process_thread(g, t) { > + if (!strncmp(t->comm, ht->comm, strlen(t->comm))) { > + list_add_tail(&ht->list, &monitor_list->list); > + return count; > + } > + } > + > + pr_err("Non-existing task: %s can't be monitored\n", ht->comm); > + return count; > +} > + > +static const struct file_operations hung_task_info_operations = { > + .open = hung_task_info_open, > + .read = seq_read, > + .write = hung_task_info_write, > + .llseek = seq_lseek, > + .release = seq_release, > +}; > + > +static int __init proc_hung_task_info_init(void) > +{ > + proc_create("hung_task_monitor_list", 0644, NULL, > + &hung_task_info_operations); > + return 0; > +} > + > static void check_hung_task(struct task_struct *t, unsigned long timeout) > { > unsigned long switch_count = t->nvcsw + t->nivcsw; > @@ -167,6 +269,7 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) > int max_count = sysctl_hung_task_check_count; > int batch_count = HUNG_TASK_BATCHING; > struct task_struct *g, *t; > + struct hung_task *ht, *tmp; > > /* > * If the system crashed already then all bets are off, > @@ -186,9 +289,21 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout) > goto unlock; > } > /* use "==" to skip the TASK_KILLABLE tasks waiting on NFS */ > - if (t->state == TASK_UNINTERRUPTIBLE) > - check_hung_task(t, timeout); > + if (t->state == TASK_UNINTERRUPTIBLE) { > + if (sysctl_hung_task_check_selected) { > + list_for_each_entry_safe(ht, tmp, > + &monitor_list->list, > + list) > + if (!strncmp(ht->comm, t->comm, > + strlen(t->comm))) > + /* Task belongs to the selected group */ > + check_hung_task(t, timeout); > + } else { > + check_hung_task(t, timeout); > + } > + } > } > + > unlock: > rcu_read_unlock(); > if (hung_task_show_lock) > @@ -259,6 +374,8 @@ static int watchdog(void *dummy) > static int __init hung_task_init(void) > { > atomic_notifier_chain_register(&panic_notifier_list, &panic_block); > + hung_task_monitor_setup(); > + proc_hung_task_info_init(); > watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); > > return 0; > diff --git a/kernel/sysctl.c b/kernel/sysctl.c > index 6648fbb..ab45774 100644 > --- a/kernel/sysctl.c > +++ b/kernel/sysctl.c > @@ -1096,6 +1096,14 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, > .proc_handler = proc_dointvec_minmax, > .extra1 = &neg_one, > }, > + { > + .procname = "hung_task_check_selected", > + .data = &sysctl_hung_task_check_selected, > + .maxlen = sizeof(int), > + .mode = 0644, > + .proc_handler = proc_dointvec_minmax, > + .extra1 = &neg_one, > + }, > #endif > #ifdef CONFIG_RT_MUTEXES > { > -- QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a\nmember of the Code Aurora Forum, hosted by The Linux Foundation