Hello ext2 maintainers, During my local fs stress test, I've encounter this. Is it false positive? Otherwise, I've made a small patch to stop reclaming recursively into FS from ext2_xattr_set(). Please consider taking this. Once I've considered about whether it should be done in VFS layer or not. I mean, every i_op->brabra() calls in VFS should be surrounded by memalloc_nofs_{save,restore}(), by a macro or something. But I am afraid it may introduce unnecesary overheads, especially when FS code doesn't allocate memory. So it is better to do it in real FS operations. J. R. Okajima ---------------------------------------- WARNING: possible circular locking dependency detected 5.6.0-rc2aufsD+ #165 Tainted: G W ------------------------------------------------------ kswapd0/94 is trying to acquire lock: ffff91f670bd7610 (sb_internal#2){.+.+}, at: ext2_evict_inode+0x7e/0x130 but task is already holding lock: ffffffff8ca901e0 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 which lock already depends on the new lock. the existing dependency chain (in reverse order) is: -> #2 (fs_reclaim){+.+.}: fs_reclaim_acquire.part.98+0x29/0x30 __kmalloc+0x44/0x320 ext2_xattr_set+0xe7/0x880 __vfs_setxattr+0x66/0x80 __vfs_setxattr_noperm+0x67/0x1a0 vfs_setxattr+0x81/0xa0 setxattr+0x13b/0x1c0 path_setxattr+0xbe/0xe0 __x64_sys_setxattr+0x27/0x30 do_syscall_64+0x54/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #1 (&ei->xattr_sem#2){++++}: down_write+0x3d/0x70 ext2_xattr_delete_inode+0x26/0x200 ext2_evict_inode+0xc2/0x130 evict+0xd0/0x1a0 vfs_rmdir+0x15c/0x180 do_rmdir+0x1c6/0x220 do_syscall_64+0x54/0x1f0 entry_SYSCALL_64_after_hwframe+0x49/0xbe -> #0 (sb_internal#2){.+.+}: __lock_acquire+0xd30/0x1540 lock_acquire+0x90/0x170 __sb_start_write+0x135/0x220 ext2_evict_inode+0x7e/0x130 evict+0xd0/0x1a0 __dentry_kill+0xdc/0x180 shrink_dentry_list+0xdd/0x200 prune_dcache_sb+0x52/0x70 super_cache_scan+0xf3/0x1a0 do_shrink_slab+0x143/0x3a0 shrink_slab+0x22c/0x2c0 shrink_node+0x16c/0x670 balance_pgdat+0x2cc/0x530 kswapd+0xad/0x470 kthread+0x11d/0x140 ret_from_fork+0x24/0x50 other info that might help us debug this: Chain exists of: sb_internal#2 --> &ei->xattr_sem#2 --> fs_reclaim Possible unsafe locking scenario: CPU0 CPU1 ---- ---- lock(fs_reclaim); lock(&ei->xattr_sem#2); lock(fs_reclaim); lock(sb_internal#2); *** DEADLOCK *** 3 locks held by kswapd0/94: #0: ffffffff8ca901e0 (fs_reclaim){+.+.}, at: __fs_reclaim_acquire+0x5/0x30 #1: ffffffff8ca81bc8 (shrinker_rwsem){++++}, at: shrink_slab+0x135/0x2c0 #2: ffff91f670bd70d8 (&type->s_umount_key#45){++++}, at: trylock_super+0x16/0x50 stack backtrace: CPU: 4 PID: 94 Comm: kswapd0 Tainted: G W 5.6.0-rc2aufsD+ #165 Hardware name: System manufacturer System Product Name/ROG STRIX H370-I GAMING, BIOS 2418 06/04/2019 Call Trace: dump_stack+0x71/0xa0 check_noncircular+0x172/0x190 __lock_acquire+0xd30/0x1540 lock_acquire+0x90/0x170 ? ext2_evict_inode+0x7e/0x130 __sb_start_write+0x135/0x220 ? ext2_evict_inode+0x7e/0x130 ? shrink_dentry_list+0x24/0x200 ext2_evict_inode+0x7e/0x130 evict+0xd0/0x1a0 __dentry_kill+0xdc/0x180 shrink_dentry_list+0xdd/0x200 prune_dcache_sb+0x52/0x70 super_cache_scan+0xf3/0x1a0 do_shrink_slab+0x143/0x3a0 shrink_slab+0x22c/0x2c0 shrink_node+0x16c/0x670 balance_pgdat+0x2cc/0x530 kswapd+0xad/0x470 ? finish_wait+0x80/0x80 ? balance_pgdat+0x530/0x530 kthread+0x11d/0x140 ? kthread_park+0x80/0x80 ret_from_fork+0x24/0x50 ---------------------------------------- a small patch diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c index 0456bc990b5e..85463fddbc17 100644 --- a/fs/ext2/xattr.c +++ b/fs/ext2/xattr.c @@ -61,6 +61,7 @@ #include <linux/quotaops.h> #include <linux/rwsem.h> #include <linux/security.h> +#include <linux/sched/mm.h> #include "ext2.h" #include "xattr.h" #include "acl.h" @@ -413,6 +414,7 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, size_t name_len, free, min_offs = sb->s_blocksize; int not_found = 1, error; char *end; + unsigned int nofs_flag; /* * header -- Points either into bh, or to a temporarily @@ -532,7 +534,9 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, unlock_buffer(bh); ea_bdebug(bh, "cloning"); + nofs_flag = memalloc_nofs_save(); header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); error = -ENOMEM; if (header == NULL) goto cleanup; @@ -545,7 +549,9 @@ ext2_xattr_set(struct inode *inode, int name_index, const char *name, } } else { /* Allocate a buffer where we construct the new block. */ + nofs_flag = memalloc_nofs_save(); header = kzalloc(sb->s_blocksize, GFP_KERNEL); + memalloc_nofs_restore(nofs_flag); error = -ENOMEM; if (header == NULL) goto cleanup;