Previous commits released the write lock across quota operations but missed several places. In particular, the free operations can also call into the file system code and take the write lock, causing deadlocks. This patch introduces some more helpers and uses them for quota call sites. Without this patch applied, reiserfs + quotas runs into deadlocks under anything more than trivial load. With this patch applied, reiserfs survives a 50-thread stress test with quotas enabled and a default ACL set. Without it, it deadlocks fairly quickly under the same load. Signed-off-by: Jeff Mahoney <jeffm@xxxxxxxx> --- fs/reiserfs/bitmap.c | 26 +++++++++++++++++++------- fs/reiserfs/inode.c | 28 ++++++++++++++++++++-------- fs/reiserfs/lock.c | 7 +++++++ fs/reiserfs/namei.c | 24 ++++++++++++++---------- fs/reiserfs/reiserfs.h | 44 ++++++++++---------------------------------- fs/reiserfs/stree.c | 28 +++++++++++++++++++++++----- fs/reiserfs/super.c | 42 ++++++++++++++++++++++++++---------------- 7 files changed, 119 insertions(+), 80 deletions(-) --- a/fs/reiserfs/bitmap.c 2013-08-05 17:49:57.394468449 -0400 +++ b/fs/reiserfs/bitmap.c 2013-08-05 17:49:58.426452696 -0400 @@ -423,8 +423,11 @@ static void _reiserfs_free_block(struct set_sb_free_blocks(rs, sb_free_blocks(rs) + 1); journal_mark_dirty(th, s, sbh); - if (for_unformatted) + if (for_unformatted) { + int depth = reiserfs_write_unlock_nested(s); dquot_free_block_nodirty(inode, 1); + reiserfs_write_lock_nested(s, depth); + } } void reiserfs_free_block(struct reiserfs_transaction_handle *th, @@ -1128,6 +1131,7 @@ static inline int blocknrs_and_prealloc_ b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1; int passno = 0; int nr_allocated = 0; + int depth; determine_prealloc_size(hint); if (!hint->formatted_node) { @@ -1137,10 +1141,13 @@ static inline int blocknrs_and_prealloc_ "reiserquota: allocating %d blocks id=%u", amount_needed, hint->inode->i_uid); #endif + depth = reiserfs_write_unlock_nested(s); quota_ret = dquot_alloc_block_nodirty(hint->inode, amount_needed); - if (quota_ret) /* Quota exceeded? */ + if (quota_ret) { /* Quota exceeded? */ + reiserfs_write_lock_nested(s, depth); return QUOTA_EXCEEDED; + } if (hint->preallocate && hint->prealloc_size) { #ifdef REISERQUOTA_DEBUG reiserfs_debug(s, REISERFS_DEBUG_CODE, @@ -1153,6 +1160,7 @@ static inline int blocknrs_and_prealloc_ hint->preallocate = hint->prealloc_size = 0; } /* for unformatted nodes, force large allocations */ + reiserfs_write_lock_nested(s, depth); } do { @@ -1181,9 +1189,12 @@ static inline int blocknrs_and_prealloc_ hint->inode->i_uid); #endif /* Free not allocated blocks */ - dquot_free_block_nodirty(hint->inode, + depth = reiserfs_write_unlock_nested(s); + dquot_free_block_nodirty( + hint->inode, amount_needed + hint->prealloc_size - nr_allocated); + reiserfs_write_lock_nested(s, depth); } while (nr_allocated--) reiserfs_free_block(hint->th, hint->inode, @@ -1214,10 +1225,11 @@ static inline int blocknrs_and_prealloc_ REISERFS_I(hint->inode)->i_prealloc_count, hint->inode->i_uid); #endif - dquot_free_block_nodirty(hint->inode, amount_needed + - hint->prealloc_size - nr_allocated - - REISERFS_I(hint->inode)-> - i_prealloc_count); + depth = reiserfs_write_unlock_nested(s); + dquot_free_block_nodirty(hint->inode, + amount_needed + hint->prealloc_size - nr_allocated - + REISERFS_I(hint->inode)->i_prealloc_count); + reiserfs_write_lock_nested(s, depth); } return CARRY_ON; --- a/fs/reiserfs/inode.c 2013-08-05 17:49:57.442467716 -0400 +++ b/fs/reiserfs/inode.c 2013-08-05 17:49:58.446452390 -0400 @@ -25,6 +25,7 @@ int reiserfs_commit_write(struct file *f void reiserfs_evict_inode(struct inode *inode) { + struct super_block *sb = inode->i_sb; /* We need blocks for transaction + (user+group) quota update (possibly delete) */ int jbegin_count = JOURNAL_PER_BALANCE_CNT * 2 + @@ -57,8 +58,11 @@ void reiserfs_evict_inode(struct inode * /* Do quota update inside a transaction for journaled quotas. We must do that * after delete_object so that quota updates go into the same transaction as * stat data deletion */ - if (!err) + if (!err) { + int depth = reiserfs_write_unlock_nested(sb); dquot_free_inode(inode); + reiserfs_write_lock_nested(sb, depth); + } if (journal_end(&th, inode->i_sb, jbegin_count)) goto out; @@ -1523,7 +1527,10 @@ struct inode *reiserfs_iget(struct super if (comp_short_keys(INODE_PKEY(inode), key) || is_bad_inode(inode)) { /* either due to i/o error or a stale NFS handle */ + int depth; + depth = reiserfs_write_unlock_nested(s); iput(inode); + reiserfs_write_lock_nested(s, depth); inode = NULL; } return inode; @@ -1540,12 +1547,12 @@ static struct dentry *reiserfs_get_dentr key.on_disk_key.k_dir_id = dir_id; reiserfs_write_lock(sb); inode = reiserfs_iget(sb, &key); + reiserfs_write_unlock(sb); if (inode && !IS_ERR(inode) && generation != 0 && generation != inode->i_generation) { iput(inode); inode = NULL; } - reiserfs_write_unlock(sb); return d_obtain_alias(inode); } @@ -1778,11 +1785,13 @@ int reiserfs_new_inode(struct reiserfs_t int err; int depth; + reiserfs_check_lock_nested(dir->i_sb, __func__); + BUG_ON(!th->t_trans_id); - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(dir->i_sb); err = dquot_alloc_inode(inode); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(dir->i_sb, depth); if (err) goto out_end_trans; if (!dir->i_nlink) { @@ -1809,10 +1818,10 @@ int reiserfs_new_inode(struct reiserfs_t memcpy(INODE_PKEY(inode), &(ih.ih_key), KEY_SIZE); args.dirid = le32_to_cpu(ih.ih_key.k_dir_id); - depth = reiserfs_write_unlock_nested(inode->i_sb); + reiserfs_write_unlock(inode->i_sb); err = insert_inode_locked4(inode, args.objectid, reiserfs_find_actor, &args); - reiserfs_write_lock_nested(inode->i_sb, depth); + reiserfs_write_lock(inode->i_sb); if (err) { err = -EINVAL; goto out_bad_inode; @@ -1983,14 +1992,16 @@ int reiserfs_new_inode(struct reiserfs_t INODE_PKEY(inode)->k_objectid = 0; /* Quota change must be inside a transaction for journaling */ + depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_free_inode(inode); + reiserfs_write_lock_nested(inode->i_sb, depth); out_end_trans: journal_end(th, th->t_super, th->t_blocks_allocated); - reiserfs_write_unlock(inode->i_sb); /* Drop can be outside and it needs more credits so it's better to have it outside */ + depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_drop(inode); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); inode->i_flags |= S_NOQUOTA; make_bad_inode(inode); @@ -1998,6 +2009,7 @@ int reiserfs_new_inode(struct reiserfs_t clear_nlink(inode); th->t_trans_id = 0; /* so the caller can't use this handle later */ unlock_new_inode(inode); /* OK to do even if we hadn't locked it */ + reiserfs_write_unlock(inode->i_sb); iput(inode); return err; } --- a/fs/reiserfs/lock.c 2013-08-05 17:49:57.482467105 -0400 +++ b/fs/reiserfs/lock.c 2013-08-05 17:49:58.458452207 -0400 @@ -111,6 +111,13 @@ void reiserfs_check_lock_depth(struct su WARN_ON(sb_i->lock_depth < 0); } +void reiserfs_check_lock_nested(struct super_block *sb, const char *caller) +{ + struct reiserfs_sb_info *sb_i = REISERFS_SB(sb); + + WARN_ON(sb_i->lock_depth > 0); +} + #ifdef CONFIG_REISERFS_CHECK void reiserfs_lock_check_recursive(struct super_block *sb) { --- a/fs/reiserfs/namei.c 2013-08-05 17:49:57.498466861 -0400 +++ b/fs/reiserfs/namei.c 2013-08-05 17:49:58.470452024 -0400 @@ -604,8 +604,9 @@ static int reiserfs_create(struct inode retval = reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, inode, &security); + /* inode is dropped and write lock is released */ if (retval) - goto out_failed; + return retval; inode->i_op = &reiserfs_file_inode_operations; inode->i_fop = &reiserfs_file_operations; @@ -678,9 +679,9 @@ static int reiserfs_mknod(struct inode * retval = reiserfs_new_inode(&th, dir, mode, NULL, 0 /*i_size */ , dentry, inode, &security); - if (retval) { - goto out_failed; - } + /* inode is dropped and write lock is released */ + if (retval) + return retval; inode->i_op = &reiserfs_special_inode_operations; init_special_inode(inode, inode->i_mode, rdev); @@ -763,9 +764,10 @@ static int reiserfs_mkdir(struct inode * old_format_only(dir->i_sb) ? EMPTY_DIR_SIZE_V1 : EMPTY_DIR_SIZE, dentry, inode, &security); + /* inode is dropped and write lock is released */ if (retval) { DEC_DIR_INODE_NLINK(dir) - goto out_failed; + return retval; } reiserfs_update_inode_transaction(inode); @@ -787,8 +789,9 @@ static int reiserfs_mkdir(struct inode * if (err) retval = err; unlock_new_inode(inode); + reiserfs_write_unlock(dir->i_sb); iput(inode); - goto out_failed; + return retval; } // the above add_entry did not update dir's stat data reiserfs_update_sd(&th, dir); @@ -1060,9 +1063,9 @@ static int reiserfs_symlink(struct inode reiserfs_new_inode(&th, parent_dir, mode, name, strlen(symname), dentry, inode, &security); kfree(name); - if (retval) { /* reiserfs_new_inode iputs for us */ - goto out_failed; - } + /* inode is dropped and write lock is released */ + if (retval) + return retval; reiserfs_update_inode_transaction(inode); reiserfs_update_inode_transaction(parent_dir); @@ -1084,8 +1087,9 @@ static int reiserfs_symlink(struct inode if (err) retval = err; unlock_new_inode(inode); + reiserfs_write_unlock(parent_dir->i_sb); iput(inode); - goto out_failed; + return retval; } unlock_new_inode(inode); --- a/fs/reiserfs/reiserfs.h 2013-08-05 17:49:57.562465884 -0400 +++ b/fs/reiserfs/reiserfs.h 2013-08-05 17:49:58.506451475 -0400 @@ -13,6 +13,7 @@ #include <linux/bitops.h> #include <linux/proc_fs.h> #include <linux/buffer_head.h> +#include <linux/quotaops.h> /* the 32 bit compat definitions with int argument */ #define REISERFS_IOC32_UNPACK _IOW(0xCD, 1, int) @@ -632,6 +633,7 @@ void reiserfs_write_lock(struct super_bl void reiserfs_write_unlock(struct super_block *s); int __must_check reiserfs_write_unlock_nested(struct super_block *s); void reiserfs_write_lock_nested(struct super_block *s, int depth); +void reiserfs_check_lock_nested(struct super_block *s, const char *caller); #ifdef CONFIG_REISERFS_CHECK void reiserfs_lock_check_recursive(struct super_block *s); @@ -667,52 +669,26 @@ static inline void reiserfs_lock_check_r * - The inode mutex */ -#define reiserfs_safe(sb, action) \ -do { \ - struct super_block *__sb = (sb); \ - int __depth; \ - __depth = reiserfs_write_unlock_nested(__sb); \ - (action); \ - reiserfs_write_lock_nested(__sb, __depth); \ -} while(0) - -#define reiserfs_mutex_lock_safe(mtx, s) reiserfs_safe(s, mutex_lock(mtx)) -#define reiserfs_mutex_lock_nested_safe(mtx, subclass, s) \ - reiserfs_safe(s, mutex_lock_nested(mtx, subclass)) -#define reiserfs_down_read_safe(sem, s) reiserfs_safe(s, down_read(sem)) - /* * When we schedule, we usually want to also release the write lock, * according to the previous bkl based locking scheme of reiserfs. */ static inline void reiserfs_cond_resched(struct super_block *s) { - if (need_resched()) - reiserfs_safe(s, schedule()); -} - -static inline struct buffer_head * -reiserfs_safe_sb_bread(struct super_block *s, sector_t block) -{ - int depth; - struct buffer_head *bh; - - depth = reiserfs_write_unlock_nested(s); - bh = sb_bread(s, block); - reiserfs_write_lock_nested(s, depth); - - return bh; + if (need_resched()) { + int depth = reiserfs_write_unlock_nested(s); + schedule(); + reiserfs_write_lock_nested(s, depth); + } } -void reiserfs_safe_lock_buffer(struct buffer_head *bh); - -static inline void -reiserfs_safe_wait_on_buffer(struct buffer_head *bh, struct super_block *s) +static inline void reiserfs_mutex_lock_safe(struct mutex *m, + struct super_block *s) { int depth; depth = reiserfs_write_unlock_nested(s); - __wait_on_buffer(bh); + mutex_lock(m); reiserfs_write_lock_nested(s, depth); } --- a/fs/reiserfs/stree.c 2013-08-05 17:49:57.534466311 -0400 +++ b/fs/reiserfs/stree.c 2013-08-05 17:49:58.482451841 -0400 @@ -1198,6 +1198,7 @@ int reiserfs_delete_item(struct reiserfs struct item_head *q_ih; int quota_cut_bytes; int ret_value, del_size, removed; + int depth; #ifdef CONFIG_REISERFS_CHECK char mode; @@ -1307,7 +1308,9 @@ int reiserfs_delete_item(struct reiserfs "reiserquota delete_item(): freeing %u, id=%u type=%c", quota_cut_bytes, inode->i_uid, head2type(&s_ih)); #endif + depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, quota_cut_bytes); + reiserfs_write_lock_nested(sb, depth); /* Return deleted body length */ return ret_value; @@ -1340,6 +1343,7 @@ void reiserfs_delete_solid_item(struct r struct cpu_key cpu_key; int retval; int quota_cut_bytes = 0; + struct super_block *sb = th->t_super; BUG_ON(!th->t_trans_id); @@ -1385,14 +1389,17 @@ void reiserfs_delete_solid_item(struct r if (retval == CARRY_ON) { do_balance(&tb, NULL, NULL, M_DELETE); if (inode) { /* Should we count quota for item? (we don't count quotas for save-links) */ + int depth; #ifdef REISERQUOTA_DEBUG reiserfs_debug(th->t_super, REISERFS_DEBUG_CODE, "reiserquota delete_solid_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, key2type(key)); #endif + depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, quota_cut_bytes); + reiserfs_write_lock_nested(sb, depth); } break; } @@ -1569,6 +1576,7 @@ int reiserfs_cut_from_item(struct reiser int retval2 = -1; int quota_cut_bytes; loff_t tail_pos = 0; + int depth; BUG_ON(!th->t_trans_id); @@ -1741,7 +1749,9 @@ int reiserfs_cut_from_item(struct reiser "reiserquota cut_from_item(): freeing %u id=%u type=%c", quota_cut_bytes, inode->i_uid, '?'); #endif + depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, quota_cut_bytes); + reiserfs_write_lock_nested(sb, depth); return ret_value; } @@ -1964,6 +1974,7 @@ int reiserfs_paste_into_item(struct reis struct tree_balance s_paste_balance; int retval; int fs_gen; + int depth; BUG_ON(!th->t_trans_id); @@ -1976,9 +1987,9 @@ int reiserfs_paste_into_item(struct reis key2type(&(key->on_disk_key))); #endif - reiserfs_write_unlock(inode->i_sb); + depth = reiserfs_write_unlock_nested(inode->i_sb); retval = dquot_alloc_space_nodirty(inode, pasted_size); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(inode->i_sb, depth); if (retval) { pathrelse(search_path); return retval; @@ -2035,7 +2046,9 @@ int reiserfs_paste_into_item(struct reis pasted_size, inode->i_uid, key2type(&(key->on_disk_key))); #endif + depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_free_space_nodirty(inode, pasted_size); + reiserfs_write_lock_nested(inode->i_sb, depth); return retval; } @@ -2050,10 +2063,12 @@ int reiserfs_insert_item(struct reiserfs struct item_head *ih, struct inode *inode, const char *body) { + struct super_block *sb = th->t_super; struct tree_balance s_ins_balance; int retval; int fs_gen = 0; int quota_bytes = 0; + int depth; BUG_ON(!th->t_trans_id); @@ -2071,11 +2086,11 @@ int reiserfs_insert_item(struct reiserfs "reiserquota insert_item(): allocating %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif - reiserfs_write_unlock(inode->i_sb); /* We can't dirty inode here. It would be immediately written but * appropriate stat item isn't inserted yet... */ + depth = reiserfs_write_unlock_nested(sb); retval = dquot_alloc_space_nodirty(inode, quota_bytes); - reiserfs_write_lock(inode->i_sb); + reiserfs_write_lock_nested(sb, depth); if (retval) { pathrelse(path); return retval; @@ -2126,7 +2141,10 @@ int reiserfs_insert_item(struct reiserfs "reiserquota insert_item(): freeing %u id=%u type=%c", quota_bytes, inode->i_uid, head2type(ih)); #endif - if (inode) + if (inode) { + depth = reiserfs_write_unlock_nested(sb); dquot_free_space_nodirty(inode, quota_bytes); + reiserfs_write_lock_nested(sb, depth); + } return retval; } --- a/fs/reiserfs/super.c 2013-08-05 17:49:57.546466129 -0400 +++ b/fs/reiserfs/super.c 2013-08-05 17:49:58.494451658 -0400 @@ -198,6 +198,7 @@ static int finish_unfinished(struct supe int done; struct inode *inode; int truncate; + int depth; #ifdef CONFIG_QUOTA int i; int ms_active_set; @@ -298,9 +299,9 @@ static int finish_unfinished(struct supe retval = remove_save_link_only(s, &save_link_key, 0); continue; } - reiserfs_write_unlock(s); + depth = reiserfs_write_unlock_nested(inode->i_sb); dquot_initialize(inode); - reiserfs_write_lock(s); + reiserfs_write_lock_nested(inode->i_sb, depth); if (truncate && S_ISDIR(inode->i_mode)) { /* We got a truncate request for a dir which is impossible. @@ -356,10 +357,12 @@ static int finish_unfinished(struct supe #ifdef CONFIG_QUOTA /* Turn quotas off */ + reiserfs_write_unlock(s); for (i = 0; i < MAXQUOTAS; i++) { if (sb_dqopt(s)->files[i] && quota_enabled[i]) dquot_quota_off(s, i); } + reiserfs_write_lock(s); if (ms_active_set) /* Restore the flag back */ s->s_flags &= ~MS_ACTIVE; @@ -2097,7 +2100,7 @@ static int reiserfs_statfs(struct dentry static int reiserfs_write_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; - int ret, err; + int ret, err, depth; reiserfs_write_lock(dquot->dq_sb); ret = @@ -2105,9 +2108,9 @@ static int reiserfs_write_dquot(struct d REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); if (ret) goto out; - reiserfs_write_unlock(dquot->dq_sb); + depth = reiserfs_write_unlock_nested(dquot->dq_sb); ret = dquot_commit(dquot); - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock_nested(dquot->dq_sb, depth); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_TRANS_BLOCKS(dquot->dq_sb)); @@ -2121,7 +2124,7 @@ out: static int reiserfs_acquire_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; - int ret, err; + int ret, err, depth; reiserfs_write_lock(dquot->dq_sb); ret = @@ -2129,9 +2132,9 @@ static int reiserfs_acquire_dquot(struct REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); if (ret) goto out; - reiserfs_write_unlock(dquot->dq_sb); + depth = reiserfs_write_unlock_nested(dquot->dq_sb); ret = dquot_acquire(dquot); - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock_nested(dquot->dq_sb, depth); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_INIT_BLOCKS(dquot->dq_sb)); @@ -2145,20 +2148,21 @@ out: static int reiserfs_release_dquot(struct dquot *dquot) { struct reiserfs_transaction_handle th; - int ret, err; + int ret, err, depth; reiserfs_write_lock(dquot->dq_sb); ret = journal_begin(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); - reiserfs_write_unlock(dquot->dq_sb); + depth = reiserfs_write_unlock_nested(dquot->dq_sb); if (ret) { /* Release dquot anyway to avoid endless cycle in dqput() */ dquot_release(dquot); + reiserfs_write_lock_nested(dquot->dq_sb, depth); goto out; } ret = dquot_release(dquot); - reiserfs_write_lock(dquot->dq_sb); + reiserfs_write_lock_nested(dquot->dq_sb, depth); err = journal_end(&th, dquot->dq_sb, REISERFS_QUOTA_DEL_BLOCKS(dquot->dq_sb)); @@ -2183,16 +2187,16 @@ static int reiserfs_mark_dquot_dirty(str static int reiserfs_write_info(struct super_block *sb, int type) { struct reiserfs_transaction_handle th; - int ret, err; + int ret, err, depth; /* Data block + inode block */ reiserfs_write_lock(sb); ret = journal_begin(&th, sb, 2); if (ret) goto out; - reiserfs_write_unlock(sb); + depth = reiserfs_write_unlock_nested(sb); ret = dquot_commit_info(sb, type); - reiserfs_write_lock(sb); + reiserfs_write_lock_nested(sb, depth); err = journal_end(&th, sb, 2); if (!ret && err) ret = err; @@ -2206,8 +2210,14 @@ out: */ static int reiserfs_quota_on_mount(struct super_block *sb, int type) { - return dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], - REISERFS_SB(sb)->s_jquota_fmt, type); + int ret, depth; + + depth = reiserfs_write_unlock_nested(sb); + ret = dquot_quota_on_mount(sb, REISERFS_SB(sb)->s_qf_names[type], + REISERFS_SB(sb)->s_jquota_fmt, type); + reiserfs_write_lock_nested(sb, depth); + + return ret; } /* -- To unsubscribe from this list: send the line "unsubscribe reiserfs-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html