On Fri, 22 Oct 2010 21:34:55 +0400, Dmitry Monakhov <dmonakhov@xxxxxxxxxx> wrote: > From: Dmitry Monakhov <dmonakhov@xxxxxxxxx> > > Currently dq_data_lock is global, which is bad for scalability. > In fact different super_blocks have no shared quota data. > So we may simply convert global the lock to per-sb locks. > > Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> > --- > fs/ocfs2/quota_global.c | 20 ++++++++-------- > fs/ocfs2/quota_local.c | 13 ++++++----- > fs/quota/dquot.c | 54 ++++++++++++++++++++++++---------------------- > fs/quota/quota_tree.c | 8 +++--- > fs/quota/quota_v2.c | 4 +- > include/linux/quota.h | 3 +- > 6 files changed, 52 insertions(+), 50 deletions(-) > > diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c > index b464947..3e7fda8 100644 > --- a/fs/ocfs2/quota_global.c > +++ b/fs/ocfs2/quota_global.c > @@ -300,12 +300,12 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) > status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex); > if (status < 0) > return status; > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(oinfo->dqi_gqinode->i_sb)->dq_data_lock); > if (!oinfo->dqi_gqi_count++) > oinfo->dqi_gqi_bh = bh; > else > WARN_ON(bh != oinfo->dqi_gqi_bh); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(oinfo->dqi_gqinode->i_sb)->dq_data_lock); > if (ex) { > mutex_lock(&oinfo->dqi_gqinode->i_mutex); > down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); > @@ -325,10 +325,10 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) > } > ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); > brelse(oinfo->dqi_gqi_bh); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(oinfo->dqi_gqinode->i_sb)->dq_data_lock); > if (!--oinfo->dqi_gqi_count) > oinfo->dqi_gqi_bh = NULL; > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(oinfo->dqi_gqinode->i_sb)->dq_data_lock); > } > > /* Read information header from global quota file */ > @@ -421,11 +421,11 @@ static int __ocfs2_global_write_info(struct super_block *sb, int type) > struct ocfs2_global_disk_dqinfo dinfo; > ssize_t size; > > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > info->dqi_flags &= ~DQF_INFO_DIRTY; > dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); > dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms); > dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks); > dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk); > @@ -502,7 +502,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) > /* Update space and inode usage. Get also other information from > * global quota file so that we don't overwrite any changes there. > * We are */ > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > spacechange = dquot->dq_dqb.dqb_curspace - > OCFS2_DQUOT(dquot)->dq_origspace; > inodechange = dquot->dq_dqb.dqb_curinodes - > @@ -556,7 +556,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) > __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); > OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; > OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > err = ocfs2_qinfo_lock(info, freeing); > if (err < 0) { > mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" > @@ -835,10 +835,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) > > /* In case user set some limits, sync dquot immediately to global > * quota file so that information propagates quicker */ > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > if (dquot->dq_flags & mask) > sync = 1; > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > /* This is a slight hack but we can't afford getting global quota > * lock if we already have a transaction started. */ > if (!sync || journal_current_handle()) { > diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c > index 7c30ba3..2d2e981 100644 > --- a/fs/ocfs2/quota_local.c > +++ b/fs/ocfs2/quota_local.c > @@ -288,14 +288,15 @@ static void olq_update_info(struct buffer_head *bh, void *private) > struct mem_dqinfo *info = private; > struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; > struct ocfs2_local_disk_dqinfo *ldinfo; > + struct quota_info *dqopt = dqopts(oinfo->dqi_gqinode->i_sb); > > ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + > OCFS2_LOCAL_INFO_OFF); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopt->dq_data_lock); > ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); > ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); > ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopt->dq_data_lock); > } > > static int ocfs2_add_recovery_chunk(struct super_block *sb, > @@ -523,7 +524,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, > goto out_drop_lock; > } > mutex_lock(&dqopts(sb)->dqio_mutex); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > /* Add usage from quota entry into quota changes > * of our node. Auxiliary variables are important > * due to signedness */ > @@ -531,7 +532,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, > inodechange = le64_to_cpu(dqblk->dqb_inodemod); > dquot->dq_dqb.dqb_curspace += spacechange; > dquot->dq_dqb.dqb_curinodes += inodechange; > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > /* We want to drop reference held by the crashed > * node. Since we have our own reference we know > * global structure actually won't be freed. */ > @@ -876,12 +877,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) > + ol_dqblk_block_offset(sb, od->dq_local_off)); > > dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - > od->dq_origspace); > dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - > od->dq_originodes); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > mlog(0, "Writing local dquot %u space %lld inodes %lld\n", > od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), > (long long)le64_to_cpu(dqblk->dqb_inodemod)); > diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c > index fdaa386..6c68172 100644 > --- a/fs/quota/dquot.c > +++ b/fs/quota/dquot.c > @@ -128,8 +128,6 @@ > */ > > static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_fmt_lock); > -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); > -EXPORT_SYMBOL(dq_data_lock); > > void __quota_error(struct super_block *sb, const char *func, > const char *fmt, ...) > @@ -1417,8 +1415,11 @@ static void __dquot_initialize(struct inode *inode, int type) > * did a write before quota was turned on > */ > rsv = inode_get_rsv_space(inode); > - if (unlikely(rsv)) > - dquot_resv_space(inode->i_dquot[cnt], rsv); > + if (unlikely(rsv)) { > + spin_lock(&got[cnt]->dq_lock); > + dquot_resv_space(got[cnt], rsv); > + spin_unlock(&got[cnt]->dq_lock); > + } Ohhh, sorry. got[cnt] is already NULL at this moment, Off course this hank should look like follows + if (unlikely(rsv)) { + spin_lock(&inode->i_dquot[cnt]->dq_lock); + dquot_resv_space(inode->i_dquot[cnt], rsv); + spin_unlock(&inode->i_dquot[cnt]->dq_lock); > } > } > out_err: > @@ -1574,14 +1575,14 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) > for (cnt = 0; cnt < MAXQUOTAS; cnt++) > warntype[cnt] = QUOTA_NL_NOWARN; > > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); > for (cnt = 0; cnt < MAXQUOTAS; cnt++) { > if (!inode->i_dquot[cnt]) > continue; > ret = check_bdq(inode->i_dquot[cnt], number, !warn, > warntype+cnt); > if (ret && !nofail) { > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > goto out_flush_warn; > } > } > @@ -1594,7 +1595,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) > dquot_incr_space(inode->i_dquot[cnt], number); > } > inode_incr_space(inode, number, reserve); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > > if (reserve) > goto out_flush_warn; > @@ -1622,7 +1623,7 @@ int dquot_alloc_inode(const struct inode *inode) > for (cnt = 0; cnt < MAXQUOTAS; cnt++) > warntype[cnt] = QUOTA_NL_NOWARN; > down_read(&dqctl(inode->i_sb)->dqptr_sem); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); > for (cnt = 0; cnt < MAXQUOTAS; cnt++) { > if (!inode->i_dquot[cnt]) > continue; > @@ -1638,7 +1639,7 @@ int dquot_alloc_inode(const struct inode *inode) > } > > warn_put_all: > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > if (ret == 0) > mark_all_dquot_dirty(inode->i_dquot); > flush_warnings(inode->i_dquot, warntype); > @@ -1660,7 +1661,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) > } > > down_read(&dqctl(inode->i_sb)->dqptr_sem); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); > /* Claim reserved quotas to allocated quotas */ > for (cnt = 0; cnt < MAXQUOTAS; cnt++) { > if (inode->i_dquot[cnt]) > @@ -1669,7 +1670,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) > } > /* Update inode bytes */ > inode_claim_rsv_space(inode, number); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > mark_all_dquot_dirty(inode->i_dquot); > up_read(&dqctl(inode->i_sb)->dqptr_sem); > return 0; > @@ -1693,7 +1694,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) > } > > down_read(&dqctl(inode->i_sb)->dqptr_sem); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); > for (cnt = 0; cnt < MAXQUOTAS; cnt++) { > if (!inode->i_dquot[cnt]) > continue; > @@ -1704,7 +1705,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) > dquot_decr_space(inode->i_dquot[cnt], number); > } > inode_decr_space(inode, number, reserve); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > > if (reserve) > goto out_unlock; > @@ -1729,14 +1730,14 @@ void dquot_free_inode(const struct inode *inode) > return; > > down_read(&dqctl(inode->i_sb)->dqptr_sem); > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); > for (cnt = 0; cnt < MAXQUOTAS; cnt++) { > if (!inode->i_dquot[cnt]) > continue; > warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); > dquot_decr_inodes(inode->i_dquot[cnt], 1); > } > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > mark_all_dquot_dirty(inode->i_dquot); > flush_warnings(inode->i_dquot, warntype); > up_read(&dqctl(inode->i_sb)->dqptr_sem); > @@ -1775,7 +1776,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) > up_write(&dqctl(inode->i_sb)->dqptr_sem); > return 0; > } > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); > cur_space = inode_get_bytes(inode); > rsv_space = inode_get_rsv_space(inode); > space = cur_space + rsv_space; > @@ -1823,7 +1824,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) > > inode->i_dquot[cnt] = transfer_to[cnt]; > } > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > up_write(&dqctl(inode->i_sb)->dqptr_sem); > > mark_all_dquot_dirty(transfer_from); > @@ -1837,7 +1838,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) > transfer_to[cnt] = transfer_from[cnt]; > return 0; > over_quota: > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); > up_write(&dqctl(inode->i_sb)->dqptr_sem); > flush_warnings(transfer_to, warntype_to); > return ret; > @@ -1958,6 +1959,7 @@ static int alloc_quota_info(struct quota_ctl_info *dqctl) { > > mutex_init(&dqopt->dqio_mutex); > spin_lock_init(&dqopt->dq_state_lock); > + spin_lock_init(&dqopt->dq_data_lock); > spin_lock_init(&dqopt->dq_list_lock); > INIT_LIST_HEAD(&dqopt->dq_inuse_list); > INIT_LIST_HEAD(&dqopt->dq_free_list); > @@ -2415,7 +2417,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) > FS_USER_QUOTA : FS_GROUP_QUOTA; > di->d_id = dquot->dq_id; > > - spin_lock(&dq_data_lock); > + spin_lock(&sb_dqopts(dquot)->dq_data_lock); > di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); > di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); > di->d_ino_hardlimit = dm->dqb_ihardlimit; > @@ -2424,7 +2426,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) > di->d_icount = dm->dqb_curinodes; > di->d_btimer = dm->dqb_btime; > di->d_itimer = dm->dqb_itime; > - spin_unlock(&dq_data_lock); > + spin_unlock(&sb_dqopts(dquot)->dq_data_lock); > } > > int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, > @@ -2467,7 +2469,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) > (di->d_ino_hardlimit > dqi->dqi_maxilimit))) > return -ERANGE; > > - spin_lock(&dq_data_lock); > + spin_lock(&sb_dqopts(dquot)->dq_data_lock); > if (di->d_fieldmask & FS_DQ_BCOUNT) { > dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; > check_blim = 1; > @@ -2533,7 +2535,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) > clear_bit(DQ_FAKE_B, &dquot->dq_flags); > else > set_bit(DQ_FAKE_B, &dquot->dq_flags); > - spin_unlock(&dq_data_lock); > + spin_unlock(&sb_dqopts(dquot)->dq_data_lock); > mark_dquot_dirty(dquot); > > return 0; > @@ -2568,12 +2570,12 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) > return -ESRCH; > } > mi = dqopts(sb)->info + type; > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > ii->dqi_bgrace = mi->dqi_bgrace; > ii->dqi_igrace = mi->dqi_igrace; > ii->dqi_flags = mi->dqi_flags & DQF_MASK; > ii->dqi_valid = IIF_ALL; > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > mutex_unlock(&dqctl(sb)->dqonoff_mutex); > return 0; > } > @@ -2591,7 +2593,7 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) > goto out; > } > mi = dqopts(sb)->info + type; > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > if (ii->dqi_valid & IIF_BGRACE) > mi->dqi_bgrace = ii->dqi_bgrace; > if (ii->dqi_valid & IIF_IGRACE) > @@ -2599,7 +2601,7 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) > if (ii->dqi_valid & IIF_FLAGS) > mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | > (ii->dqi_flags & DQF_MASK); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > mark_info_dirty(sb, type); > /* Force write to disk */ > dqctl(sb)->dq_op->write_info(sb, type); > diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c > index 21a4a6a..a089c70 100644 > --- a/fs/quota/quota_tree.c > +++ b/fs/quota/quota_tree.c > @@ -375,9 +375,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) > return ret; > } > } > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > info->dqi_ops->mem2disk_dqblk(ddquot, dquot); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, > dquot->dq_off); > if (ret != info->dqi_entry_size) { > @@ -631,14 +631,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) > kfree(ddquot); > goto out; > } > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > info->dqi_ops->disk2mem_dqblk(dquot, ddquot); > if (!dquot->dq_dqb.dqb_bhardlimit && > !dquot->dq_dqb.dqb_bsoftlimit && > !dquot->dq_dqb.dqb_ihardlimit && > !dquot->dq_dqb.dqb_isoftlimit) > set_bit(DQ_FAKE_B, &dquot->dq_flags); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > kfree(ddquot); > out: > dqstats_inc(DQST_READS); > diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c > index 65444d2..e4ef8de 100644 > --- a/fs/quota/quota_v2.c > +++ b/fs/quota/quota_v2.c > @@ -153,12 +153,12 @@ static int v2_write_file_info(struct super_block *sb, int type) > struct qtree_mem_dqinfo *qinfo = info->dqi_priv; > ssize_t size; > > - spin_lock(&dq_data_lock); > + spin_lock(&dqopts(sb)->dq_data_lock); > info->dqi_flags &= ~DQF_INFO_DIRTY; > dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); > dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); > dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); > - spin_unlock(&dq_data_lock); > + spin_unlock(&dqopts(sb)->dq_data_lock); > dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); > dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); > dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry); > diff --git a/include/linux/quota.h b/include/linux/quota.h > index 754aedb..6b04001 100644 > --- a/include/linux/quota.h > +++ b/include/linux/quota.h > @@ -186,8 +186,6 @@ enum { > typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ > typedef long long qsize_t; /* Type in which we store sizes */ > > -extern spinlock_t dq_data_lock; > - > /* Maximal numbers of writes for quota operation (insert/delete/update) > * (over VFS all formats) */ > #define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC) > @@ -413,6 +411,7 @@ struct quota_info { > struct mutex dqio_mutex; /* lock device while I/O in progress */ > struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ > spinlock_t dq_state_lock; /* serialize quota state changes*/ > + spinlock_t dq_data_lock; /* protect in memory data */ > spinlock_t dq_list_lock; /* protect lists and hash*/ > struct list_head dq_inuse_list; /* list of inused dquotas */ > struct list_head dq_free_list; /* list of free dquotas */ > -- > 1.6.5.2 > -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html