claim_swapfile() currently keeps the inode locked when it is successful, or the file is already swapfile (with -EBUSY). And, on the other error cases, it does not lock the inode. This inconsistency of the lock state and return value is quite confusing and actually causing a bad unlock balance as below in the "bad_swap" section of __do_sys_swapon(). This commit fixes this issue by unlocking the inode on the error path. It also reverts blocksize and releases bdev, so that the caller can safely forget about the inode. ===================================== WARNING: bad unlock balance detected! 5.5.0-rc7+ #176 Not tainted ------------------------------------- swapon/4294 is trying to release lock (&sb->s_type->i_mutex_key) at: [<ffffffff8173a6eb>] __do_sys_swapon+0x94b/0x3550 but there are no more locks to release! other info that might help us debug this: no locks held by swapon/4294. stack backtrace: CPU: 5 PID: 4294 Comm: swapon Not tainted 5.5.0-rc7-BTRFS-ZNS+ #176 Hardware name: ASUS All Series/H87-PRO, BIOS 2102 07/29/2014 Call Trace: dump_stack+0xa1/0xea ? __do_sys_swapon+0x94b/0x3550 print_unlock_imbalance_bug.cold+0x114/0x123 ? __do_sys_swapon+0x94b/0x3550 lock_release+0x562/0xed0 ? kvfree+0x31/0x40 ? lock_downgrade+0x770/0x770 ? kvfree+0x31/0x40 ? rcu_read_lock_sched_held+0xa1/0xd0 ? rcu_read_lock_bh_held+0xb0/0xb0 up_write+0x2d/0x490 ? kfree+0x293/0x2f0 __do_sys_swapon+0x94b/0x3550 ? putname+0xb0/0xf0 ? kmem_cache_free+0x2e7/0x370 ? do_sys_open+0x184/0x3e0 ? generic_max_swapfile_size+0x40/0x40 ? do_syscall_64+0x27/0x4b0 ? entry_SYSCALL_64_after_hwframe+0x49/0xbe ? lockdep_hardirqs_on+0x38c/0x590 __x64_sys_swapon+0x54/0x80 do_syscall_64+0xa4/0x4b0 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x7f15da0a0dc7 Fixes: 1638045c3677 ("mm: set S_SWAPFILE on blockdev swap devices") Signed-off-by: Naohiro Aota <naohiro.aota@xxxxxxx> --- mm/swapfile.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/mm/swapfile.c b/mm/swapfile.c index bb3261d45b6a..dd5d7fa42282 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -2886,24 +2886,37 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode) p->old_block_size = block_size(p->bdev); error = set_blocksize(p->bdev, PAGE_SIZE); if (error < 0) - return error; + goto err; /* * Zoned block devices contain zones that have a sequential * write only restriction. Hence zoned block devices are not * suitable for swapping. Disallow them here. */ - if (blk_queue_is_zoned(p->bdev->bd_queue)) - return -EINVAL; + if (blk_queue_is_zoned(p->bdev->bd_queue)) { + error = -EINVAL; + goto err; + } p->flags |= SWP_BLKDEV; } else if (S_ISREG(inode->i_mode)) { p->bdev = inode->i_sb->s_bdev; } inode_lock(inode); - if (IS_SWAPFILE(inode)) - return -EBUSY; + if (IS_SWAPFILE(inode)) { + inode_unlock(inode); + error = -EBUSY; + goto err; + } return 0; + +err: + if (S_ISBLK(inode->i_mode)) { + set_blocksize(p->bdev, p->old_block_size); + blkdev_put(p->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + } + + return error; } @@ -3157,10 +3170,12 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags) mapping = swap_file->f_mapping; inode = mapping->host; - /* If S_ISREG(inode->i_mode) will do inode_lock(inode); */ + /* do inode_lock(inode); */ error = claim_swapfile(p, inode); - if (unlikely(error)) + if (unlikely(error)) { + inode = NULL; goto bad_swap; + } /* * Read the swap header. -- 2.25.0