currently ->dq_data_lock is responsible for protecting three things 1) dquot->dq_dqb info consistency 2) synchronization between ->dq_dqb with ->i_bytes 3) Protects mem_dqinfo (per-sb data), 3b) and consystency between mem_dqinfo and dq_dqb for following data. dqi_bgrace <=> dqb_btime dqi_igrace <=> dqb_itime In fact (1) and (2) is conceptually different from (3) By introducing per-dquot data lock we later can split (1)(2) from (3) This patch simply introduce new lock, without changing ->dq_data_lock. Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxx> --- fs/ocfs2/quota_global.c | 4 ++ fs/ocfs2/quota_local.c | 4 ++ fs/quota/dquot.c | 130 ++++++++++++++++++++++++++++++++++------------- fs/quota/quota_tree.c | 4 ++ include/linux/quota.h | 1 + 5 files changed, 107 insertions(+), 36 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index 6c4bc77..1c8db96 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -503,6 +503,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) * global quota file so that we don't overwrite any changes there. * We are */ spin_lock(&sb_dqopt(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); spacechange = dquot->dq_dqb.dqb_curspace - OCFS2_DQUOT(dquot)->dq_origspace; inodechange = dquot->dq_dqb.dqb_curinodes - @@ -556,6 +557,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopt(sb)->dq_data_lock); err = ocfs2_qinfo_lock(info, freeing); if (err < 0) { @@ -836,8 +838,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) /* In case user set some limits, sync dquot immediately to global * quota file so that information propagates quicker */ spin_lock(&sb_dqopt(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); if (dquot->dq_flags & mask) sync = 1; + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopt(sb)->dq_data_lock); /* This is a slight hack but we can't afford getting global quota * lock if we already have a transaction started. */ diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index c078799..3d99ec5 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -525,6 +525,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, } mutex_lock(&sb_dqopt(sb)->dqio_mutex); spin_lock(&sb_dqopt(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); /* Add usage from quota entry into quota changes * of our node. Auxiliary variables are important * due to signedness */ @@ -532,6 +533,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, inodechange = le64_to_cpu(dqblk->dqb_inodemod); dquot->dq_dqb.dqb_curspace += spacechange; dquot->dq_dqb.dqb_curinodes += inodechange; + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopt(sb)->dq_data_lock); /* We want to drop reference held by the crashed * node. Since we have our own reference we know @@ -878,10 +880,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); spin_lock(&sb_dqopt(sb)->dq_data_lock); + spin_lock(&od->dq_dquot.dq_lock); dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - od->dq_origspace); dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - od->dq_originodes); + spin_unlock(&od->dq_dquot.dq_lock); spin_unlock(&sb_dqopt(sb)->dq_data_lock); mlog(0, "Writing local dquot %u space %lld inodes %lld\n", od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 47bc291..2c87709 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -82,16 +82,18 @@ /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas - * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and - * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. + * dq_data_lock protects mem_dqinfo structures and mem_dqinfo with + * dq_dqb consystency. + * dq_lock protects dquot->dq_dqb and also guards consistency of + * dquot->dq_dqb with inode->i_blocks, i_bytes. * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly * in inode_add_bytes() and inode_sub_bytes(). dq_state_lock protects * modifications of quota state (on quotaon and quotaoff) and readers who care * about latest values take it as well. * - * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock, + * The spinlock ordering is hence: + * dq_data_lock > dq_lock > dq_list_lock > i_lock, * dq_list_lock > dq_state_lock - * dq_list_lock > dq_list_lock * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock @@ -378,6 +380,37 @@ static inline void dqput_all(struct dquot **dquot) dqput(dquot[cnt]); } +static inline void inode_dquot_lock(const struct inode *inode, + struct dquot **dquot) +{ + unsigned int cnt; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + dquot[cnt] = inode->i_dquot[cnt]; + if (dquot[cnt]) + spin_lock(&dquot[cnt]->dq_lock); + } +} + +static inline void dquot_lock_all(struct dquot **dquot) +{ + unsigned int cnt; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (dquot[cnt]) + spin_lock(&dquot[cnt]->dq_lock); + +} + +static inline void dquot_unlock_all(struct dquot **dquot) +{ + unsigned int cnt; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (dquot[cnt]) + spin_unlock(&dquot[cnt]->dq_lock); +} + /* This function needs dq_list_lock */ static inline int clear_dquot_dirty(struct dquot *dquot) { @@ -804,6 +837,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) INIT_HLIST_NODE(&dquot->dq_hash); INIT_LIST_HEAD(&dquot->dq_dirty); init_waitqueue_head(&dquot->dq_wait_unused); + spin_lock_init(&dquot->dq_lock); dquot->dq_sb = sb; dquot->dq_type = type; atomic_set(&dquot->dq_count, 1); @@ -1220,7 +1254,7 @@ static int ignore_hardlimit(struct dquot *dquot) !(info->dqi_flags & V1_DQF_RSQUASH)); } -/* needs dq_data_lock */ +/* needs dq_data_lock, ->dq_lock */ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) { qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; @@ -1257,7 +1291,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) return 0; } -/* needs dq_data_lock */ +/* needs dq_data_lock, ->dq_lock */ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) { qsize_t tspace; @@ -1547,6 +1581,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) int warn = flags & DQUOT_SPACE_WARN; int reserve = flags & DQUOT_SPACE_RESERVE; int nofail = flags & DQUOT_SPACE_NOFAIL; + struct dquot *dquot[MAXQUOTAS]; /* * First test before acquiring mutex - solves deadlocks when we @@ -1562,32 +1597,34 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) warntype[cnt] = QUOTA_NL_NOWARN; spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock); + inode_dquot_lock(inode, dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - ret = check_bdq(inode->i_dquot[cnt], number, !warn, - warntype+cnt); + ret = check_bdq(dquot[cnt], number, !warn, warntype+cnt); if (ret && !nofail) { + dquot_unlock_all(dquot); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); goto out_flush_warn; } } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; if (reserve) - dquot_resv_space(inode->i_dquot[cnt], number); + dquot_resv_space(dquot[cnt], number); else - dquot_incr_space(inode->i_dquot[cnt], number); + dquot_incr_space(dquot[cnt], number); } inode_incr_space(inode, number, reserve); + dquot_unlock_all(dquot); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); if (reserve) goto out_flush_warn; - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); out_flush_warn: - flush_warnings(inode->i_dquot, warntype); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); out: return ret; @@ -1601,6 +1638,7 @@ int dquot_alloc_inode(const struct inode *inode) { int cnt, ret = 0; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1610,25 +1648,27 @@ int dquot_alloc_inode(const struct inode *inode) warntype[cnt] = QUOTA_NL_NOWARN; down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock); + inode_dquot_lock(inode, dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt); + ret = check_idq(dquot[cnt], 1, warntype + cnt); if (ret) goto warn_put_all; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - dquot_incr_inodes(inode->i_dquot[cnt], 1); + dquot_incr_inodes(dquot[cnt], 1); } warn_put_all: + dquot_unlock_all(dquot); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); if (ret == 0) - mark_all_dquot_dirty(inode->i_dquot); - flush_warnings(inode->i_dquot, warntype); + mark_all_dquot_dirty(dquot); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; } @@ -1640,6 +1680,7 @@ EXPORT_SYMBOL(dquot_alloc_inode); int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) { int cnt; + struct dquot *dquot[MAXQUOTAS]; if (!dquot_active(inode)) { inode_claim_rsv_space(inode, number); @@ -1648,16 +1689,17 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock); + inode_dquot_lock(inode, dquot); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_claim_reserved_space(inode->i_dquot[cnt], - number); + if (dquot[cnt]) + dquot_claim_reserved_space(dquot[cnt], number); } /* Update inode bytes */ inode_claim_rsv_space(inode, number); + dquot_unlock_all(dquot); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return 0; } @@ -1671,6 +1713,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) unsigned int cnt; char warntype[MAXQUOTAS]; int reserve = flags & DQUOT_SPACE_RESERVE; + struct dquot *dquot[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1681,23 +1724,25 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock); + inode_dquot_lock(inode, dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); + warntype[cnt] = info_bdq_free(dquot[cnt], number); if (reserve) - dquot_free_reserved_space(inode->i_dquot[cnt], number); + dquot_free_reserved_space(dquot[cnt], number); else - dquot_decr_space(inode->i_dquot[cnt], number); + dquot_decr_space(dquot[cnt], number); } inode_decr_space(inode, number, reserve); + dquot_unlock_all(dquot); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); if (reserve) goto out_unlock; - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); out_unlock: - flush_warnings(inode->i_dquot, warntype); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); } EXPORT_SYMBOL(__dquot_free_space); @@ -1709,6 +1754,7 @@ void dquot_free_inode(const struct inode *inode) { unsigned int cnt; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1717,15 +1763,17 @@ void dquot_free_inode(const struct inode *inode) down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock); + inode_dquot_lock(inode, dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); - dquot_decr_inodes(inode->i_dquot[cnt], 1); + warntype[cnt] = info_idq_free(dquot[cnt], 1); + dquot_decr_inodes(dquot[cnt], 1); } + dquot_unlock_all(dquot); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); - flush_warnings(inode->i_dquot, warntype); + mark_all_dquot_dirty(dquot); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); } EXPORT_SYMBOL(dquot_free_inode); @@ -1762,14 +1810,16 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) return 0; } spin_lock(&sb_dqopt(inode->i_sb)->dq_data_lock); + inode_dquot_lock(inode, transfer_from); cur_space = inode_get_bytes(inode); rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; + dquot_lock_all(transfer_to); /* Build the transfer_from list and check the limits */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!transfer_to[cnt]) continue; - transfer_from[cnt] = inode->i_dquot[cnt]; ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); if (ret) goto over_quota; @@ -1806,6 +1856,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) inode->i_dquot[cnt] = transfer_to[cnt]; } + dquot_unlock_all(transfer_to); + dquot_unlock_all(transfer_from); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); @@ -1820,6 +1872,8 @@ warn: flush_warnings(transfer_from, warntype_from_space); return ret; over_quota: + dquot_unlock_all(transfer_to); + dquot_unlock_all(transfer_from); spin_unlock(&sb_dqopt(inode->i_sb)->dq_data_lock); up_write(&sb_dqopt(inode->i_sb)->dqptr_sem); goto warn; @@ -2363,6 +2417,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) di->d_id = dquot->dq_id; spin_lock(&dq_opt(dquot)->dq_data_lock); + spin_lock(&dquot->dq_lock); di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); di->d_ino_hardlimit = dm->dqb_ihardlimit; @@ -2371,6 +2426,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) di->d_icount = dm->dqb_curinodes; di->d_btimer = dm->dqb_btime; di->d_itimer = dm->dqb_itime; + spin_unlock(&dquot->dq_lock); spin_unlock(&dq_opt(dquot)->dq_data_lock); } @@ -2415,6 +2471,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) return -ERANGE; spin_lock(&dq_opt(dquot)->dq_data_lock); + spin_lock(&dquot->dq_lock); if (di->d_fieldmask & FS_DQ_BCOUNT) { dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; check_blim = 1; @@ -2480,6 +2537,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dquot->dq_lock); spin_unlock(&dq_opt(dquot)->dq_data_lock); mark_dquot_dirty(dquot); diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index 8b04f24..1643c30 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -376,7 +376,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) } } spin_lock(&sb_dqopt(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); info->dqi_ops->mem2disk_dqblk(ddquot, dquot); + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopt(sb)->dq_data_lock); ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); @@ -632,12 +634,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) goto out; } spin_lock(&sb_dqopt(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); info->dqi_ops->disk2mem_dqblk(dquot, ddquot); if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopt(sb)->dq_data_lock); kfree(ddquot); out: diff --git a/include/linux/quota.h b/include/linux/quota.h index 9e7a102..197660f 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -294,6 +294,7 @@ struct dquot { unsigned long dq_flags; /* See DQ_* */ short dq_type; /* Type of quota */ struct mem_dqblk dq_dqb; /* Diskquota usage */ + spinlock_t dq_lock; /* protect in mem_dqblk */ }; /* Operations which must be implemented by each quota format */ -- 1.6.6.1 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html