On 2018/06/16 4:40, Tetsuo Handa wrote: > Hmm, there might be other locations calling percpu_rwsem_release() ? There are other locations calling percpu_rwsem_release(), but quite few. include/linux/fs.h:1494:#define __sb_writers_release(sb, lev) \ include/linux/fs.h-1495- percpu_rwsem_release(&(sb)->s_writers.rw_sem[(lev)-1], 1, _THIS_IP_) fs/btrfs/transaction.c:1821: __sb_writers_release(fs_info->sb, SB_FREEZE_FS); fs/aio.c:1566: __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); fs/xfs/xfs_aops.c:211: __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); I'd like to check what atomic_long_read(&sem->rw_sem.count) says when hung task is reported. Dmitry, if you succeed to reproduce khungtaskd messages, can you try this patch? --- include/linux/sched.h | 1 + kernel/hung_task.c | 25 +++++++++++++++++++++++++ kernel/locking/percpu-rwsem.c | 2 ++ 3 files changed, 28 insertions(+) diff --git a/include/linux/sched.h b/include/linux/sched.h index 87bf02d..af95251 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1179,6 +1179,7 @@ struct task_struct { /* Used by LSM modules for access restriction: */ void *security; #endif + struct percpu_rw_semaphore *pcpu_rwsem_read_waiting; /* * New fields for task_struct should be added above here, so that diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 32b4794..409cc82 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -18,6 +18,7 @@ #include <linux/utsname.h> #include <linux/sched/signal.h> #include <linux/sched/debug.h> +#include <linux/percpu-rwsem.h> #include <trace/events/sched.h> @@ -77,6 +78,29 @@ static int __init hung_task_panic_setup(char *str) .notifier_call = hung_task_panic, }; +static void dump_percpu_rwsem_state(struct percpu_rw_semaphore *sem) +{ + unsigned int sum = 0; + int cpu; + + if (!sem) + return; + pr_info("percpu_rw_semaphore(%p)\n", sem); + pr_info(" ->rw_sem.count=0x%lx\n", + atomic_long_read(&sem->rw_sem.count)); + pr_info(" ->rss.gp_state=%d\n", sem->rss.gp_state); + pr_info(" ->rss.gp_count=%d\n", sem->rss.gp_count); + pr_info(" ->rss.cb_state=%d\n", sem->rss.cb_state); + pr_info(" ->rss.gp_type=%d\n", sem->rss.gp_type); + pr_info(" ->readers_block=%d\n", sem->readers_block); + for_each_possible_cpu(cpu) + sum += per_cpu(*sem->read_count, cpu); + pr_info(" ->read_count=%d\n", sum); + pr_info(" ->list_empty(rw_sem.wait_list)=%d\n", + list_empty(&sem->rw_sem.wait_list)); + pr_info(" ->writer.task=%p\n", sem->writer.task); +} + static void check_hung_task(struct task_struct *t, unsigned long timeout) { unsigned long switch_count = t->nvcsw + t->nivcsw; @@ -122,6 +146,7 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout) pr_err("\"echo 0 > /proc/sys/kernel/hung_task_timeout_secs\"" " disables this message.\n"); sched_show_task(t); + dump_percpu_rwsem_state(t->pcpu_rwsem_read_waiting); hung_task_show_lock = true; } diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 883cf1b..b3654ab 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c @@ -82,7 +82,9 @@ int __percpu_down_read(struct percpu_rw_semaphore *sem, int try) /* * Avoid lockdep for the down/up_read() we already have them. */ + current->pcpu_rwsem_read_waiting = sem; __down_read(&sem->rw_sem); + current->pcpu_rwsem_read_waiting = NULL; this_cpu_inc(*sem->read_count); __up_read(&sem->rw_sem); -- 1.8.3.1