currently ->dq_data_lock is responsible for protecting three things 1) dquot->dq_dqb info consistency 2) synchronization between ->dq_dqb with ->i_bytes 3) Protects mem_dqinfo (per-sb data), 3b) and consistency between mem_dqinfo and dq_dqb for following data. dqi_bgrace <=> dqb_btime dqi_igrace <=> dqb_itime In fact (1) and (2) is conceptually different from (3) By introducing per-dquot data lock we later can split (1)(2) from (3) This patch simply introduces a new lock, without changing ->dq_data_lock. Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> --- fs/ocfs2/quota_global.c | 4 ++ fs/ocfs2/quota_local.c | 4 ++ fs/quota/dquot.c | 119 ++++++++++++++++++++++++++++++++++++++++++----- fs/quota/quota_tree.c | 4 ++ include/linux/quota.h | 13 +++++ 5 files changed, 132 insertions(+), 12 deletions(-) diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c index d65d18a..c005693 100644 --- a/fs/ocfs2/quota_global.c +++ b/fs/ocfs2/quota_global.c @@ -504,6 +504,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) * global quota file so that we don't overwrite any changes there. * We are */ spin_lock(&dqopts(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); spacechange = dquot->dq_dqb.dqb_curspace - OCFS2_DQUOT(dquot)->dq_origspace; inodechange = dquot->dq_dqb.dqb_curinodes - @@ -557,6 +558,7 @@ int __ocfs2_sync_dquot(struct dquot *dquot, int freeing) __clear_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags); OCFS2_DQUOT(dquot)->dq_origspace = dquot->dq_dqb.dqb_curspace; OCFS2_DQUOT(dquot)->dq_originodes = dquot->dq_dqb.dqb_curinodes; + spin_unlock(&dquot->dq_lock); spin_unlock(&dqopts(sb)->dq_data_lock); err = ocfs2_qinfo_lock(info, freeing); if (err < 0) { @@ -837,8 +839,10 @@ static int ocfs2_mark_dquot_dirty(struct dquot *dquot) /* In case user set some limits, sync dquot immediately to global * quota file so that information propagates quicker */ spin_lock(&dqopts(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); if (dquot->dq_flags & mask) sync = 1; + spin_unlock(&dquot->dq_lock); spin_unlock(&dqopts(sb)->dq_data_lock); /* This is a slight hack but we can't afford getting global quota * lock if we already have a transaction started. */ diff --git a/fs/ocfs2/quota_local.c b/fs/ocfs2/quota_local.c index 2d2e981..1490cb0 100644 --- a/fs/ocfs2/quota_local.c +++ b/fs/ocfs2/quota_local.c @@ -525,6 +525,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, } mutex_lock(&dqopts(sb)->dqio_mutex); spin_lock(&dqopts(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); /* Add usage from quota entry into quota changes * of our node. Auxiliary variables are important * due to signedness */ @@ -532,6 +533,7 @@ static int ocfs2_recover_local_quota_file(struct inode *lqinode, inodechange = le64_to_cpu(dqblk->dqb_inodemod); dquot->dq_dqb.dqb_curspace += spacechange; dquot->dq_dqb.dqb_curinodes += inodechange; + spin_unlock(&dquot->dq_lock); spin_unlock(&dqopts(sb)->dq_data_lock); /* We want to drop reference held by the crashed * node. Since we have our own reference we know @@ -878,10 +880,12 @@ static void olq_set_dquot(struct buffer_head *bh, void *private) dqblk->dqb_id = cpu_to_le64(od->dq_dquot.dq_id); spin_lock(&dqopts(sb)->dq_data_lock); + spin_lock(&od->dq_dquot.dq_lock); dqblk->dqb_spacemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curspace - od->dq_origspace); dqblk->dqb_inodemod = cpu_to_le64(od->dq_dquot.dq_dqb.dqb_curinodes - od->dq_originodes); + spin_unlock(&od->dq_dquot.dq_lock); spin_unlock(&dqopts(sb)->dq_data_lock); mlog(0, "Writing local dquot %u space %lld inodes %lld\n", od->dq_dquot.dq_id, (long long)le64_to_cpu(dqblk->dqb_spacemod), diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 0dcf61e..5898b46 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -82,14 +82,17 @@ /* * There are three quota SMP locks. dq_list_lock protects all lists with quotas - * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and - * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes. + * dq_data_lock protects mem_dqinfo structures and mem_dqinfo with + * dq_dqb consystency. + * dq_lock protects dquot->dq_dqb and also guards consistency of + * dquot->dq_dqb with inode->i_blocks, i_bytes. * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly * in inode_add_bytes() and inode_sub_bytes(). * - * The spinlock ordering is hence: dq_data_lock > dq_list_lock > i_lock, + * The spinlock ordering is hence: + * dq_data_lock > dq_lock > dq_list_lock > i_lock, * dq_list_lock > hlist_bl_head - + * * Note that some things (eg. sb pointer, type, id) doesn't change during * the life of the dquot structure and so needn't to be protected by a lock * @@ -144,6 +147,9 @@ EXPORT_SYMBOL(__quota_error); #if defined(CONFIG_QUOTA_DEBUG) || defined(CONFIG_PRINT_QUOTA_WARNING) static char *quotatypes[] = INITQFNAMES; +#define ASSERT_SPIN_LOCKED(lk) assert_spin_locked(lk) +#else +#define ASSERT_SPIN_LOCKED(lk) #endif static struct quota_format_type *quota_formats; /* List of registered formats */ static struct quota_module_name module_names[] = INIT_QUOTA_MODULE_NAMES; @@ -371,6 +377,56 @@ static inline void dqput_all(struct dquot **dquot) dqput(dquot[cnt]); } +static void __lock_dquot_double(struct dquot * const dq1, + struct dquot * const dq2) +{ + if(dq1 < dq2) { + spin_lock_nested(&dq1->dq_lock, DQUOT_LOCK_CLASS(dq1)); + spin_lock_nested(&dq2->dq_lock, DQUOT_LOCK_CLASS_NESTED(dq2)); + } else { + spin_lock_nested(&dq2->dq_lock, DQUOT_LOCK_CLASS(dq2)); + spin_lock_nested(&dq1->dq_lock, DQUOT_LOCK_CLASS_NESTED(dq1)); + } +} + +/* This is strange, but all conditions are possible */ +static inline void lock_dquot_double(struct dquot * const *dq1, + struct dquot * const *dq2) +{ + unsigned int cnt; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (dq1[cnt]) { + if (likely(dq2[cnt])) + __lock_dquot_double(dq1[cnt], dq2[cnt]); + else + spin_lock_nested(&dq1[cnt]->dq_lock, + DQUOT_LOCK_CLASS(dq1[cnt])); + } else { + if (unlikely(dq2[cnt])) + spin_lock_nested(&dq2[cnt]->dq_lock, + DQUOT_LOCK_CLASS(dq2[cnt])); + } + } +} +static inline void lock_inode_dquots(struct dquot * const *dquot) +{ + unsigned int cnt; + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (dquot[cnt]) + spin_lock_nested(&dquot[cnt]->dq_lock, + DQUOT_LOCK_CLASS(dquot[cnt])); +} + +static inline void unlock_inode_dquots(struct dquot * const *dquot) +{ + unsigned int cnt; + + for (cnt = 0; cnt < MAXQUOTAS; cnt++) + if (dquot[cnt]) + spin_unlock(&dquot[cnt]->dq_lock); +} + /* This function needs dq_list_lock */ static inline int clear_dquot_dirty(struct dquot *dquot) { @@ -805,6 +861,7 @@ static struct dquot *get_empty_dquot(struct super_block *sb, int type) INIT_LIST_HEAD(&dquot->dq_dirty); INIT_HLIST_BL_NODE(&dquot->dq_hash); init_waitqueue_head(&dquot->dq_wait_unused); + spin_lock_init(&dquot->dq_lock); dquot->dq_sb = sb; dquot->dq_type = type; atomic_set(&dquot->dq_count, 1); @@ -1062,16 +1119,19 @@ static void drop_dquot_ref(struct super_block *sb, int type) static inline void dquot_incr_inodes(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); dquot->dq_dqb.dqb_curinodes += number; } static inline void dquot_incr_space(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); dquot->dq_dqb.dqb_curspace += number; } static inline void dquot_resv_space(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); dquot->dq_dqb.dqb_rsvspace += number; } @@ -1080,6 +1140,7 @@ static inline void dquot_resv_space(struct dquot *dquot, qsize_t number) */ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); if (dquot->dq_dqb.dqb_rsvspace < number) { WARN_ON_ONCE(1); number = dquot->dq_dqb.dqb_rsvspace; @@ -1091,6 +1152,7 @@ static void dquot_claim_reserved_space(struct dquot *dquot, qsize_t number) static inline void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); if (dquot->dq_dqb.dqb_rsvspace >= number) dquot->dq_dqb.dqb_rsvspace -= number; else { @@ -1101,6 +1163,7 @@ void dquot_free_reserved_space(struct dquot *dquot, qsize_t number) static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); if (dqctl(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curinodes >= number) dquot->dq_dqb.dqb_curinodes -= number; @@ -1113,6 +1176,7 @@ static void dquot_decr_inodes(struct dquot *dquot, qsize_t number) static void dquot_decr_space(struct dquot *dquot, qsize_t number) { + ASSERT_SPIN_LOCKED(&dquot->dq_lock); if (dqctl(dquot->dq_sb)->flags & DQUOT_NEGATIVE_USAGE || dquot->dq_dqb.dqb_curspace >= number) dquot->dq_dqb.dqb_curspace -= number; @@ -1230,7 +1294,7 @@ static int ignore_hardlimit(struct dquot *dquot) !(info->dqi_flags & V1_DQF_RSQUASH)); } -/* needs dq_data_lock */ +/* needs dq_data_lock, ->dq_lock */ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) { qsize_t newinodes = dquot->dq_dqb.dqb_curinodes + inodes; @@ -1267,7 +1331,7 @@ static int check_idq(struct dquot *dquot, qsize_t inodes, char *warntype) return 0; } -/* needs dq_data_lock */ +/* needs dq_data_lock, ->dq_lock */ static int check_bdq(struct dquot *dquot, qsize_t space, int prealloc, char *warntype) { qsize_t tspace; @@ -1422,8 +1486,11 @@ static void __dquot_initialize(struct inode *inode, int type) * did a write before quota was turned on */ rsv = inode_get_rsv_space(inode); - if (unlikely(rsv)) + if (unlikely(rsv)) { + spin_lock(&inode->i_dquot[cnt]->dq_lock); dquot_resv_space(inode->i_dquot[cnt], rsv); + spin_unlock(&inode->i_dquot[cnt]->dq_lock); + } } } spin_unlock(&dqopts(sb)->dq_data_lock); @@ -1603,12 +1670,14 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) warntype[cnt] = QUOTA_NL_NOWARN; spin_lock(&dqopts(inode->i_sb)->dq_data_lock); + lock_inode_dquots(inode->i_dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; ret = check_bdq(inode->i_dquot[cnt], number, !warn, - warntype+cnt); + warntype + cnt); if (ret && !nofail) { + unlock_inode_dquots(inode->i_dquot); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); goto out_flush_warn; } @@ -1622,6 +1691,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags) dquot_incr_space(inode->i_dquot[cnt], number); } inode_incr_space(inode, number, reserve); + unlock_inode_dquots(inode->i_dquot); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (reserve) @@ -1658,6 +1728,7 @@ int dquot_alloc_inode(const struct inode *inode) warntype[cnt] = QUOTA_NL_NOWARN; down_read(&dqopts(inode->i_sb)->dqptr_sem); spin_lock(&dqopts(inode->i_sb)->dq_data_lock); + lock_inode_dquots(inode->i_dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; @@ -1673,6 +1744,7 @@ int dquot_alloc_inode(const struct inode *inode) } warn_put_all: + unlock_inode_dquots(inode->i_dquot); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (ret == 0) mark_all_dquot_dirty(inode->i_dquot); @@ -1700,6 +1772,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) rcu_read_unlock(); down_read(&dqopts(inode->i_sb)->dqptr_sem); spin_lock(&dqopts(inode->i_sb)->dq_data_lock); + lock_inode_dquots(inode->i_dquot); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (inode->i_dquot[cnt]) @@ -1708,6 +1781,7 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number) } /* Update inode bytes */ inode_claim_rsv_space(inode, number); + unlock_inode_dquots(inode->i_dquot); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); up_read(&dqopts(inode->i_sb)->dqptr_sem); @@ -1738,6 +1812,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) rcu_read_unlock(); down_read(&dqopts(inode->i_sb)->dqptr_sem); spin_lock(&dqopts(inode->i_sb)->dq_data_lock); + lock_inode_dquots(inode->i_dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; @@ -1748,6 +1823,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags) dquot_decr_space(inode->i_dquot[cnt], number); } inode_decr_space(inode, number, reserve); + unlock_inode_dquots(inode->i_dquot); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); if (reserve) @@ -1779,12 +1855,14 @@ void dquot_free_inode(const struct inode *inode) rcu_read_unlock(); down_read(&dqopts(inode->i_sb)->dqptr_sem); spin_lock(&dqopts(inode->i_sb)->dq_data_lock); + lock_inode_dquots(inode->i_dquot); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { if (!inode->i_dquot[cnt]) continue; warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1); dquot_decr_inodes(inode->i_dquot[cnt], 1); } + unlock_inode_dquots(inode->i_dquot); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); mark_all_dquot_dirty(inode->i_dquot); flush_warnings(inode->i_dquot, warntype); @@ -1833,10 +1911,6 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) return 0; } spin_lock(&dqopts(inode->i_sb)->dq_data_lock); - cur_space = inode_get_bytes(inode); - rsv_space = inode_get_rsv_space(inode); - space = cur_space + rsv_space; - /* Build the transfer_from list and check the limits */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { /* * Skip changes for same uid or gid or for turned off quota-type. @@ -1846,8 +1920,21 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) /* Avoid races with quotaoff() */ if (!sb_has_quota_active(inode->i_sb, cnt)) continue; + /* Quota may be the same due to previous errors when + chown succeed but inode still belongs to old dquot*/ + if (transfer_to[cnt] == inode->i_dquot[cnt]) + continue; is_valid[cnt] = 1; transfer_from[cnt] = inode->i_dquot[cnt]; + } + lock_dquot_double(transfer_from, transfer_to); + cur_space = inode_get_bytes(inode); + rsv_space = inode_get_rsv_space(inode); + space = cur_space + rsv_space; + /* Build the transfer_from list and check the limits */ + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + if (!is_valid[cnt]) + continue; ret = check_idq(transfer_to[cnt], 1, warntype_to + cnt); if (ret) goto over_quota; @@ -1880,6 +1967,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) inode->i_dquot[cnt] = transfer_to[cnt]; } + unlock_inode_dquots(transfer_to); + unlock_inode_dquots(transfer_from); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); up_write(&dqopts(inode->i_sb)->dqptr_sem); srcu_read_unlock(&dqopts(inode->i_sb)->dq_srcu, idx); @@ -1894,6 +1983,8 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to) transfer_to[cnt] = transfer_from[cnt]; return 0; over_quota: + unlock_inode_dquots(transfer_to); + unlock_inode_dquots(transfer_from); spin_unlock(&dqopts(inode->i_sb)->dq_data_lock); up_write(&dqopts(inode->i_sb)->dqptr_sem); srcu_read_unlock(&dqopts(inode->i_sb)->dq_srcu, idx); @@ -2455,6 +2546,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) di->d_id = dquot->dq_id; spin_lock(&sb_dqopts(dquot)->dq_data_lock); + spin_lock(&dquot->dq_lock); di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit); di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit); di->d_ino_hardlimit = dm->dqb_ihardlimit; @@ -2463,6 +2555,7 @@ static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di) di->d_icount = dm->dqb_curinodes; di->d_btimer = dm->dqb_btime; di->d_itimer = dm->dqb_itime; + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopts(dquot)->dq_data_lock); } @@ -2507,6 +2600,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) return -ERANGE; spin_lock(&sb_dqopts(dquot)->dq_data_lock); + spin_lock(&dquot->dq_lock); if (di->d_fieldmask & FS_DQ_BCOUNT) { dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace; check_blim = 1; @@ -2572,6 +2666,7 @@ static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di) clear_bit(DQ_FAKE_B, &dquot->dq_flags); else set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dquot->dq_lock); spin_unlock(&sb_dqopts(dquot)->dq_data_lock); mark_dquot_dirty(dquot); diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c index a089c70..e6307f6 100644 --- a/fs/quota/quota_tree.c +++ b/fs/quota/quota_tree.c @@ -376,7 +376,9 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) } } spin_lock(&dqopts(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); info->dqi_ops->mem2disk_dqblk(ddquot, dquot); + spin_unlock(&dquot->dq_lock); spin_unlock(&dqopts(sb)->dq_data_lock); ret = sb->s_op->quota_write(sb, type, ddquot, info->dqi_entry_size, dquot->dq_off); @@ -632,12 +634,14 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot) goto out; } spin_lock(&dqopts(sb)->dq_data_lock); + spin_lock(&dquot->dq_lock); info->dqi_ops->disk2mem_dqblk(dquot, ddquot); if (!dquot->dq_dqb.dqb_bhardlimit && !dquot->dq_dqb.dqb_bsoftlimit && !dquot->dq_dqb.dqb_ihardlimit && !dquot->dq_dqb.dqb_isoftlimit) set_bit(DQ_FAKE_B, &dquot->dq_flags); + spin_unlock(&dquot->dq_lock); spin_unlock(&dqopts(sb)->dq_data_lock); kfree(ddquot); out: diff --git a/include/linux/quota.h b/include/linux/quota.h index 7693b18..c88a352 100644 --- a/include/linux/quota.h +++ b/include/linux/quota.h @@ -281,6 +281,18 @@ static inline void dqstats_dec(unsigned int type) * quotactl. They are set under dq_data_lock\ * and the quota format handling dquot can\ * clear them when it sees fit. */ +/* + * To make lock_dep happy we have to place different dquot types to + * different lock classes. +*/ +enum dquot_lock_class +{ + DQUOT_LOCK_NORMAL, /* implicitly used by plain spin_lock() APIs. */ + DQUOT_LOCK_NESTED +}; +#define DQUOT_LOCK_CLASS(dquot) (DQUOT_LOCK_NORMAL + (dquot)->dq_type * 2) +#define DQUOT_LOCK_CLASS_NESTED(dquot) (DQUOT_LOCK_NESTED + \ + (dquot)->dq_type * 2) struct dquot { struct hlist_bl_node dq_hash; /* Hash list in memory */ @@ -296,6 +308,7 @@ struct dquot { unsigned long dq_flags; /* See DQ_* */ short dq_type; /* Type of quota */ struct mem_dqblk dq_dqb; /* Diskquota usage */ + spinlock_t dq_lock; /* protect in mem_dqblk */ }; /* Operations which must be implemented by each quota format */ -- 1.6.5.2 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html