Currently dq_data_lock is global, which is bad for scalability. In fact different super_blocks have no shared quota data. So we may simply convert global the lock to per-sb locks. Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> --- fs/ocfs2/quota_global.c | 29 ++++++++++++++------------- fs/ocfs2/quota_local.c | 13 ++++++----- fs/quota/dquot.c | 49 ++++++++++++++++++++++++----------------------- fs/quota/quota_tree.c | 8 +++--- fs/quota/quota_v2.c | 4 +- include/linux/quota.h | 3 +- 6 files changed, 54 insertions(+), 52 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index b464947..d65d18a 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -296,21 +296,22 @@ int ocfs2_lock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) { int status; struct buffer_head *bh = NULL; + struct inode *inode = oinfo->dqi_gqinode; - status = ocfs2_inode_lock(oinfo->dqi_gqinode, &bh, ex); + status = ocfs2_inode_lock(inode, &bh, ex); if (status < 0) return status; - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); if (!oinfo->dqi_gqi_count++) oinfo->dqi_gqi_bh = bh; else WARN_ON(bh != oinfo->dqi_gqi_bh); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (ex) { - mutex_lock(&oinfo->dqi_gqinode->i_mutex); - down_write(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + mutex_lock(&inode->i_mutex); + down_write(&OCFS2_I(inode)->ip_alloc_sem); } else { - down_read(&OCFS2_I(oinfo->dqi_gqinode)->ip_alloc_sem); + down_read(&OCFS2_I(inode)->ip_alloc_sem); } return 0; } @@ -325,10 +326,10 @@ void ocfs2_unlock_global_qf(struct ocfs2_mem_dqinfo *oinfo, int ex) } ocfs2_inode_unlock(oinfo->dqi_gqinode, ex); brelse(oinfo->dqi_gqi_bh); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(oinfo->dqi_gqinode->i_sb)->dq_data_lock); if (!--oinfo->dqi_gqi_count) oinfo->dqi_gqi_bh = NULL; - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(oinfo->dqi_gqinode->i_sb)->dq_data_lock); } /* Read information header from global quota file */ @@ -421,11 +422,11 @@ static int __ocfs2_global_write_info(struct super_block *sb, int type) struct ocfs2_global_disk_dqinfo dinfo; ssize_t size; - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); info->dqi_flags &= ~DQF_INFO_DIRTY; dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); dinfo.dqi_syncms = cpu_to_le32(oinfo->dqi_syncms); dinfo.dqi_blocks = cpu_to_le32(oinfo->dqi_gi.dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(oinfo->dqi_gi.dqi_free_blk); @@ -502,7 +503,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) /* Update space and inode usage. Get also other information from * global quota file so that we don't overwrite any changes there. * We are */ - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); spacechange = dquot->dq_dqb.dqb_curspace - OCFS2_DQUOT(dquot)->dq_origspace; inodechange = dquot->dq_dqb.dqb_curinodes - @@ -556,7 +557,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); err = ocfs2_qinfo_lock(info, freeing); if (err < 0) { mlog(ML_ERROR, "Failed to lock quota info, loosing quota write" @@ -835,10 +836,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) /* In case user set some limits, sync dquot immediately to global * quota file so that information propagates quicker */ - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); if (dquot->dq_flags & mask) sync = 1; - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); /* This is a slight hack but we can't afford getting global quota * lock if we already have a transaction started. */ if (!sync || journal_current_handle()) { diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 7c30ba3..2d2e981 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -288,14 +288,15 @@ static void olq_update_info(struct buffer_head *bh, void *private) struct mem_dqinfo *info = private; struct ocfs2_mem_dqinfo *oinfo = info->dqi_priv; struct ocfs2_local_disk_dqinfo *ldinfo; + struct quota_info *dqopt = dqopts(oinfo->dqi_gqinode->i_sb); ldinfo = (struct ocfs2_local_disk_dqinfo *)(bh->b_data + OCFS2_LOCAL_INFO_OFF); - spin_lock(&dq_data_lock); + spin_lock(&dqopt->dq_data_lock); ldinfo->dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); ldinfo->dqi_chunks = cpu_to_le32(oinfo->dqi_chunks); ldinfo->dqi_blocks = cpu_to_le32(oinfo->dqi_blocks); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopt->dq_data_lock); } static int ocfs2_add_recovery_chunk(struct super_block *sb, @@ -523,7 +524,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, goto out_drop_lock; } mutex_lock(&dqopts(sb)->dqio_mutex); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); /* Add usage from quota entry into quota changes * of our node. Auxiliary variables are important * due to signedness */ @@ -531,7 +532,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, inodechange = le64_to_cpu(dqblk->dqb_inodemod); dquot->dq_dqb.dqb_curspace += spacechange; dquot->dq_dqb.dqb_curinodes += inodechange; - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); /* We want to drop reference held by the crashed * node. Since we have our own reference we know * global structure actually won't be freed. */ @@ -876,12 +877,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) + ol_dqblk_block_offset(sb, od->dq_local_off)); dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - od->dq_origspace); dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - od->dq_originodes); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); mlog(0, "Writing local dquot %u space %lld inodes %lld\n", od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), (long long)le64_to_cpu(dqblk->dqb_inodemod)); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 2317a3b..0dcf61e 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -125,8 +125,6 @@ */ static __cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_fmt_lock); -__cacheline_aligned_in_smp DEFINE_SPINLOCK(dq_data_lock); -EXPORT_SYMBOL(dq_data_lock); void __quota_error(struct super_block *sb, const char *func, const char *fmt, ...) @@ -1406,6 +1404,7 @@ static void __dquot_initialize(struct inode *inode, int type) down_write(&dqopts(sb)->dqptr_sem); if (IS_NOQUOTA(inode)) goto out_err; + spin_lock(&dqopts(sb)->dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (type != -1 && cnt != type) continue; @@ -1427,6 +1426,7 @@ static void __dquot_initialize(struct inode *inode, int type) dquot_resv_space(inode->i_dquot[cnt], rsv); } } + spin_unlock(&dqopts(sb)->dq_data_lock); out_err: up_write(&dqopts(sb)->dqptr_sem); srcu_read_unlock(&dqopts(sb)->dq_srcu, idx); @@ -1602,14 +1602,14 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; ret = check_bdq(inode->i_dquot[cnt], number, !warn, warntype+cnt); if (ret && !nofail) { - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); goto out_flush_warn; } } @@ -1622,7 +1622,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) dquot_incr_space(inode->i_dquot[cnt], number); } inode_incr_space(inode, number, reserve); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (reserve) goto out_flush_warn; @@ -1657,7 +1657,7 @@ int dquot_alloc_inode(const struct inode *inode) for (cnt = 0; cnt < MAXQUOTAS; cnt++) warntype[cnt] = QUOTA_NL_NOWARN; down_read(&dqopts(inode->i_sb)->dqptr_sem); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; @@ -1673,7 +1673,7 @@ int dquot_alloc_inode(const struct inode *inode) } warn_put_all: - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (ret == 0) mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); @@ -1699,7 +1699,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) idx = srcu_read_lock(&dqopts(inode->i_sb)->dq_srcu); rcu_read_unlock(); down_read(&dqopts(inode->i_sb)->dqptr_sem); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt]) @@ -1708,7 +1708,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) } /* Update inode bytes */ inode_claim_rsv_space(inode, number); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); up_read(&dqopts(inode->i_sb)->dqptr_sem); srcu_read_unlock(&dqopts(inode->i_sb)->dq_srcu, idx); @@ -1737,7 +1737,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) idx = srcu_read_lock(&dqopts(inode->i_sb)->dq_srcu); rcu_read_unlock(); down_read(&dqopts(inode->i_sb)->dqptr_sem); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; @@ -1748,7 +1748,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) dquot_decr_space(inode->i_dquot[cnt], number); } inode_decr_space(inode, number, reserve); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (reserve) goto out_unlock; @@ -1778,14 +1778,14 @@ void dquot_free_inode(const struct inode *inode) idx = srcu_read_lock(&dqopts(inode->i_sb)->dq_srcu); rcu_read_unlock(); down_read(&dqopts(inode->i_sb)->dqptr_sem); - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); dquot_decr_inodes(inode->i_dquot[cnt], 1); } - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); up_read(&dqopts(inode->i_sb)->dqptr_sem); @@ -1832,7 +1832,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) srcu_read_unlock(&dqopts(inode->i_sb)->dq_srcu, idx); return 0; } - spin_lock(&dq_data_lock); + spin_lock(&dqopts(inode->i_sb)->dq_data_lock); cur_space = inode_get_bytes(inode); rsv_space = inode_get_rsv_space(inode); space = cur_space + rsv_space; @@ -1880,7 +1880,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) inode->i_dquot[cnt] = transfer_to[cnt]; } - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); up_write(&dqopts(inode->i_sb)->dqptr_sem); srcu_read_unlock(&dqopts(inode->i_sb)->dq_srcu, idx); mark_all_dquot_dirty(transfer_from); @@ -1894,7 +1894,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) transfer_to[cnt] = transfer_from[cnt]; return 0; over_quota: - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); up_write(&dqopts(inode->i_sb)->dqptr_sem); srcu_read_unlock(&dqopts(inode->i_sb)->dq_srcu, idx); flush_warnings(transfer_to, warntype_to); @@ -1999,6 +1999,7 @@ static int alloc_quota_info(struct quota_ctl_info *dqctl) { mutex_init(&dqopt->dqio_mutex); init_rwsem(&dqopt->dqptr_sem); spin_lock_init(&dqopt->dq_list_lock); + spin_lock_init(&dqopt->dq_data_lock); INIT_LIST_HEAD(&dqopt->dq_inuse_list); INIT_LIST_HEAD(&dqopt->dq_free_list); @@ -2453,7 +2454,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) FS_USER_QUOTA : FS_GROUP_QUOTA; di->d_id = dquot->dq_id; - spin_lock(&dq_data_lock); + spin_lock(&sb_dqopts(dquot)->dq_data_lock); di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); di->d_ino_hardlimit = dm->dqb_ihardlimit; @@ -2462,7 +2463,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) di->d_icount = dm->dqb_curinodes; di->d_btimer = dm->dqb_btime; di->d_itimer = dm->dqb_itime; - spin_unlock(&dq_data_lock); + spin_unlock(&sb_dqopts(dquot)->dq_data_lock); } int dquot_get_dqblk(struct super_block *sb, int type, qid_t id, @@ -2505,7 +2506,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) (di->d_ino_hardlimit > dqi->dqi_maxilimit))) return -ERANGE; - spin_lock(&dq_data_lock); + spin_lock(&sb_dqopts(dquot)->dq_data_lock); if (di->d_fieldmask & FS_DQ_BCOUNT) { dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; check_blim = 1; @@ -2571,7 +2572,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); - spin_unlock(&dq_data_lock); + spin_unlock(&sb_dqopts(dquot)->dq_data_lock); mark_dquot_dirty(dquot); return 0; @@ -2606,12 +2607,12 @@ int dquot_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) return -ESRCH; } mi = dqopts(sb)->info + type; - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); ii->dqi_bgrace = mi->dqi_bgrace; ii->dqi_igrace = mi->dqi_igrace; ii->dqi_flags = mi->dqi_flags & DQF_MASK; ii->dqi_valid = IIF_ALL; - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); mutex_unlock(&dqctl(sb)->dqonoff_mutex); return 0; } @@ -2629,7 +2630,7 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) goto out; } mi = dqopts(sb)->info + type; - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); if (ii->dqi_valid & IIF_BGRACE) mi->dqi_bgrace = ii->dqi_bgrace; if (ii->dqi_valid & IIF_IGRACE) @@ -2637,7 +2638,7 @@ int dquot_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii) if (ii->dqi_valid & IIF_FLAGS) mi->dqi_flags = (mi->dqi_flags & ~DQF_MASK) | (ii->dqi_flags & DQF_MASK); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); mark_info_dirty(sb, type); /* Force write to disk */ dqctl(sb)->dq_op->write_info(sb, type); diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index 21a4a6a..a089c70 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -375,9 +375,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) return ret; } } - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); info->dqi_ops->mem2disk_dqblk(ddquot, dquot); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); if (ret != info->dqi_entry_size) { @@ -631,14 +631,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) kfree(ddquot); goto out; } - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); info->dqi_ops->disk2mem_dqblk(dquot, ddquot); if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) set_bit(DQ_FAKE_B, &dquot->dq_flags); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); kfree(ddquot); out: dqstats_inc(DQST_READS); diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c index 65444d2..e4ef8de 100644 --- a/fs/quota/quota_v2.c +++ b/fs/quota/quota_v2.c @@ -153,12 +153,12 @@ static int v2_write_file_info(struct super_block *sb, int type) struct qtree_mem_dqinfo *qinfo = info->dqi_priv; ssize_t size; - spin_lock(&dq_data_lock); + spin_lock(&dqopts(sb)->dq_data_lock); info->dqi_flags &= ~DQF_INFO_DIRTY; dinfo.dqi_bgrace = cpu_to_le32(info->dqi_bgrace); dinfo.dqi_igrace = cpu_to_le32(info->dqi_igrace); dinfo.dqi_flags = cpu_to_le32(info->dqi_flags & DQF_MASK); - spin_unlock(&dq_data_lock); + spin_unlock(&dqopts(sb)->dq_data_lock); dinfo.dqi_blocks = cpu_to_le32(qinfo->dqi_blocks); dinfo.dqi_free_blk = cpu_to_le32(qinfo->dqi_free_blk); dinfo.dqi_free_entry = cpu_to_le32(qinfo->dqi_free_entry); diff --git a/include/linux/quota.h b/include/linux/quota.h index d07094b..7693b18 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -188,8 +188,6 @@ enum { typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */ typedef long long qsize_t; /* Type in which we store sizes */ -extern spinlock_t dq_data_lock; - /* Maximal numbers of writes for quota operation (insert/delete/update) * (over VFS all formats) */ #define DQUOT_INIT_ALLOC max(V1_INIT_ALLOC, V2_INIT_ALLOC) @@ -407,6 +405,7 @@ struct quota_ctl_info { struct quota_info { struct mutex dqio_mutex; /* lock device while I/O in progress */ struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */ + spinlock_t dq_data_lock; /* protect in memory data */ spinlock_t dq_list_lock; /* protect lists */ struct list_head dq_inuse_list; /* list of inused dquotas */ struct list_head dq_free_list; /* list of free dquotas */ -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html