On 08/20, Dave Chinner wrote: > > On Wed, Aug 19, 2015 at 05:00:26PM +0200, Oleg Nesterov wrote: > > > > Yes, we hold SB_FREEZE_WRITE lock, so recursive SB_FREEZE_FS is safe. > > > > But, this means that the comment in __sb_start_write() is still correct, > > "XFS for example gets freeze protection on internal level twice" is true, > > and we can not remove this force_trylock hack. > > You've hit a very rare corner case of a rare corner case. Yes, I see, thanks. Just fyi, I ran the tests again with the stupid debugging patch below and I do not see anything new in dmesg. So perhaps xfs_create() which does the "recursive" xfs_trans_alloc() is the only source of double-lock in fs/xfs/. Oleg. --- diff --git a/fs/super.c b/fs/super.c index a38ad91..32b1846 100644 --- a/fs/super.c +++ b/fs/super.c @@ -1153,13 +1153,57 @@ out: return ERR_PTR(error); } +void __sb_writers_acquired(struct super_block *sb, int level) +{ + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; + + WARN_ON(percpu_rwsem_is_held(sem)); + + percpu_rwsem_acquire(sem, 1, _RET_IP_); + + if (!lt->lock) { + lt->lock = sem; + save_stack_trace(<->trace); + } +} +EXPORT_SYMBOL(__sb_writers_acquired); + +void __sb_writers_release(struct super_block *sb, int level) +{ + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; + + WARN_ON(!lt->lock); + + percpu_rwsem_release(sem, 1, _RET_IP_); + + WARN_ON(percpu_rwsem_is_held(sem)); + if (lt->lock == sem) { + lt->lock = NULL; + lt->trace.nr_entries = 0; + } +} +EXPORT_SYMBOL(__sb_writers_release); + + /* * This is an internal function, please use sb_end_{write,pagefault,intwrite} * instead. */ void __sb_end_write(struct super_block *sb, int level) { + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; + + WARN_ON(!lt->lock); + percpu_up_read(sb->s_writers.rw_sem + level-1); + + if (lt->lock == sem && !percpu_rwsem_is_held(sem)) { + lt->lock = NULL; + lt->trace.nr_entries = 0; + } } EXPORT_SYMBOL(__sb_end_write); @@ -1169,10 +1213,22 @@ EXPORT_SYMBOL(__sb_end_write); */ int __sb_start_write(struct super_block *sb, int level, bool wait) { + struct percpu_rw_semaphore *sem = sb->s_writers.rw_sem + level-1; + struct ltrace *lt = current->ltrace + level-1; bool force_trylock = false; int ret = 1; + WARN_ON(lt->lock == sem && !percpu_rwsem_is_held(sem)); + #ifdef CONFIG_LOCKDEP + if (wait && lt->lock == sem) { + pr_crit("XXXXX %s:%d lev=%d\n", current->comm, current->pid, level); + dump_stack(); + debug_show_held_locks(current); + pr_crit("Prev Trace:\n"); + print_stack_trace(<->trace, 0); + } + /* * We want lockdep to tell us about possible deadlocks with freezing * but it's it bit tricky to properly instrument it. Getting a freeze @@ -1198,6 +1254,10 @@ int __sb_start_write(struct super_block *sb, int level, bool wait) ret = percpu_down_read_trylock(sb->s_writers.rw_sem + level-1); WARN_ON(force_trylock & !ret); + if (ret && !lt->lock) { + lt->lock = sem; + save_stack_trace(<->trace); + } return ret; } EXPORT_SYMBOL(__sb_start_write); diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c index 08d4fe4..33bf46a 100644 --- a/fs/xfs/xfs_log.c +++ b/fs/xfs/xfs_log.c @@ -614,7 +614,7 @@ xfs_log_mount( int min_logfsbs; if (!(mp->m_flags & XFS_MOUNT_NORECOVERY)) { - xfs_notice(mp, "Mounting V%d Filesystem", + if (0) xfs_notice(mp, "Mounting V%d Filesystem", XFS_SB_VERSION_NUM(&mp->m_sb)); } else { xfs_notice(mp, diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 480ebba..ed241dd 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -4607,7 +4607,7 @@ xlog_recover_finish( : "internal"); log->l_flags &= ~XLOG_RECOVERY_NEEDED; } else { - xfs_info(log->l_mp, "Ending clean mount"); + if (0) xfs_info(log->l_mp, "Ending clean mount"); } return 0; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 1fb16562..f680f3c 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1573,7 +1573,7 @@ xfs_fs_put_super( { struct xfs_mount *mp = XFS_M(sb); - xfs_notice(mp, "Unmounting Filesystem"); + if (0) xfs_notice(mp, "Unmounting Filesystem"); xfs_filestream_unmount(mp); xfs_unmountfs(mp); diff --git a/include/linux/fs.h b/include/linux/fs.h index ce356f6..8514e65 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1385,10 +1385,8 @@ extern struct timespec current_fs_time(struct super_block *sb); void -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html