vfs_dq_init() is called from many places (almost from all generic vfs functions). What's why this function is so performance critical. Current dquot_initialize() implementation is far from optimal because: Usually it called for inode which already has quota initialized This fast path may be easily optimized. Just check i_dquot[]. In fact, no locking is needed here. In case of slow path(where inode has no quota yet) it requires to hold dqptr_sem for write. This assumtions comes from the the rule what states: "Any changes with i_dquot[] must be protected by sem locked for write". In fact changing i_dquot from NULL => !NULL (is what vfs_dq_init does) is differ from changing it from !NULL => NULL (vfs_dq_drop, and others) The difference is what if we allow i_dquot[] to change from NULL => !NULL under dqptr_sem locked for read this will not break usual charging functions (alloc/claim/free, etc.) because pointer assignment is atomic for all arch. The only changes we have to do to prevent races is just atomically copy i_dquot[] to local variable at the beginning of each charging function. Race between concurrent init() functions is solved by cmpxchg(). Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx> --- fs/quota/dquot.c | 87 ++++++++++++++++++++++++++++++++--------------------- 1 files changed, 52 insertions(+), 35 deletions(-) diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index de7e7de..6979224 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -1246,6 +1246,9 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space) * Initialize quota pointers in inode * We do things in a bit complicated way but by that we avoid calling * dqget() and thus filesystem callbacks under dqptr_sem. + * Lock dqptr_sem for read here, is acceptable, because i_dquot it is + * acceptable to change the value from NULL => !NULL, opposite change + * must be protected by dqptr_sem for write. */ int dquot_initialize(struct inode *inode, int type) { @@ -1258,6 +1261,9 @@ int dquot_initialize(struct inode *inode, int type) * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return 0; + /* Check whenever quota was already initialized */ + if (!dqinit_needed(inode, type)) + return 0; /* First get references to structures we might need. */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { @@ -1274,7 +1280,7 @@ int dquot_initialize(struct inode *inode, int type) got[cnt] = dqget(sb, id, cnt); } - down_write(&sb_dqopt(sb)->dqptr_sem); + down_read(&sb_dqopt(sb)->dqptr_sem); /* Having dqptr_sem we know NOQUOTA flags can't be altered... */ if (IS_NOQUOTA(inode)) goto out_err; @@ -1284,13 +1290,11 @@ int dquot_initialize(struct inode *inode, int type) /* Avoid races with quotaoff() */ if (!sb_has_quota_active(sb, cnt)) continue; - if (!inode->i_dquot[cnt]) { - inode->i_dquot[cnt] = got[cnt]; + if (!cmpxchg(&inode->i_dquot[cnt], NULL, got[cnt])) got[cnt] = NULL; - } } out_err: - up_write(&sb_dqopt(sb)->dqptr_sem); + up_read(&sb_dqopt(sb)->dqptr_sem); /* Drop unused references */ dqput_all(got); return ret; @@ -1416,6 +1420,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, { int cnt, ret = QUOTA_OK; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS]; /* * First test before acquiring mutex - solves deadlocks when we @@ -1432,14 +1437,16 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, goto out_unlock; } - for (cnt = 0; cnt < MAXQUOTAS; cnt++) + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + dquot[cnt] = inode->i_dquot[cnt]; warntype[cnt] = QUOTA_NL_NOWARN; + } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt) + if (check_bdq(dquot[cnt], number, warn, warntype+cnt) == NO_QUOTA) { ret = NO_QUOTA; spin_unlock(&dq_data_lock); @@ -1447,21 +1454,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, } } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; if (reserve) - dquot_resv_space(inode->i_dquot[cnt], number); + dquot_resv_space(dquot[cnt], number); else - dquot_incr_space(inode->i_dquot[cnt], number); + dquot_incr_space(dquot[cnt], number); } inode_incr_space(inode, number, reserve); spin_unlock(&dq_data_lock); if (reserve) goto out_flush_warn; - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); out_flush_warn: - flush_warnings(inode->i_dquot, warntype); + flush_warnings(dquot, warntype); out_unlock: up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); out: @@ -1487,13 +1494,16 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number) { int cnt, ret = NO_QUOTA; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS]; /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ if (IS_NOQUOTA(inode)) return QUOTA_OK; - for (cnt = 0; cnt < MAXQUOTAS; cnt++) + for (cnt = 0; cnt < MAXQUOTAS; cnt++) { + dquot[cnt] = inode->i_dquot[cnt]; warntype[cnt] = QUOTA_NL_NOWARN; + } down_read(&sb_dqopt(inode->i_sb)->dqptr_sem); if (IS_NOQUOTA(inode)) { up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); @@ -1501,24 +1511,24 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number) } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - if (check_idq(inode->i_dquot[cnt], number, warntype+cnt) + if (check_idq(dquot[cnt], number, warntype+cnt) == NO_QUOTA) goto warn_put_all; } for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + if (!dquot[cnt]) continue; - dquot_incr_inodes(inode->i_dquot[cnt], number); + dquot_incr_inodes(dquot[cnt], number); } ret = QUOTA_OK; warn_put_all: spin_unlock(&dq_data_lock); if (ret == QUOTA_OK) - mark_all_dquot_dirty(inode->i_dquot); - flush_warnings(inode->i_dquot, warntype); + mark_all_dquot_dirty(dquot); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return ret; } @@ -1528,6 +1538,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number) { int cnt; int ret = QUOTA_OK; + struct dquot *dquot[MAXQUOTAS]; if (IS_NOQUOTA(inode)) { inode_claim_rsv_space(inode, number); @@ -1544,14 +1555,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number) spin_lock(&dq_data_lock); /* Claim reserved quotas to allocated quotas */ for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (inode->i_dquot[cnt]) - dquot_claim_reserved_space(inode->i_dquot[cnt], - number); + dquot[cnt] = inode->i_dquot[cnt]; + if (dquot[cnt]) + dquot_claim_reserved_space(dquot[cnt], number); } /* Update inode bytes */ inode_claim_rsv_space(inode, number); spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); out: return ret; @@ -1565,6 +1576,8 @@ int __dquot_free_space(struct inode *inode, qsize_t number, int reserve) { unsigned int cnt; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS]; + /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1582,22 +1595,23 @@ out_sub: } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + dquot[cnt] = inode->i_dquot[cnt]; + if (!dquot[cnt]) continue; - warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number); + warntype[cnt] = info_bdq_free(dquot[cnt], number); if (reserve) - dquot_free_reserved_space(inode->i_dquot[cnt], number); + dquot_free_reserved_space(dquot[cnt], number); else - dquot_decr_space(inode->i_dquot[cnt], number); + dquot_decr_space(dquot[cnt], number); } inode_decr_space(inode, number, reserve); spin_unlock(&dq_data_lock); if (reserve) goto out_unlock; - mark_all_dquot_dirty(inode->i_dquot); + mark_all_dquot_dirty(dquot); out_unlock: - flush_warnings(inode->i_dquot, warntype); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } @@ -1625,6 +1639,8 @@ int dquot_free_inode(const struct inode *inode, qsize_t number) { unsigned int cnt; char warntype[MAXQUOTAS]; + struct dquot *dquot[MAXQUOTAS]; + /* First test before acquiring mutex - solves deadlocks when we * re-enter the quota code and are already holding the mutex */ @@ -1639,14 +1655,15 @@ int dquot_free_inode(const struct inode *inode, qsize_t number) } spin_lock(&dq_data_lock); for (cnt = 0; cnt < MAXQUOTAS; cnt++) { - if (!inode->i_dquot[cnt]) + dquot[cnt] = inode->i_dquot[cnt]; + if (!dquot[cnt]) continue; - warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number); - dquot_decr_inodes(inode->i_dquot[cnt], number); + warntype[cnt] = info_idq_free(dquot[cnt], number); + dquot_decr_inodes(dquot[cnt], number); } spin_unlock(&dq_data_lock); - mark_all_dquot_dirty(inode->i_dquot); - flush_warnings(inode->i_dquot, warntype); + mark_all_dquot_dirty(dquot); + flush_warnings(dquot, warntype); up_read(&sb_dqopt(inode->i_sb)->dqptr_sem); return QUOTA_OK; } -- 1.6.0.4 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html