[PATCH] quota: quota initialization improvements

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



vfs_dq_init() is called from many places (almost from all generic
vfs functions). What's why this function is so performance critical.
Current dquot_initialize() implementation is far from optimal because:

Usually it called for inode which already has quota initialized
 This fast path may be easily optimized. Just check i_dquot[].
 In fact, no locking is needed here.

In case of slow path(where inode has no quota yet) it requires to hold
dqptr_sem for write. This assumtions comes from the the rule what states:
"Any changes with i_dquot[] must be protected by sem locked for write".
In fact changing i_dquot from NULL => !NULL  (is what vfs_dq_init does)
is differ from changing it from !NULL => NULL (vfs_dq_drop, and others)

The difference is what if we allow i_dquot[] to change from
NULL => !NULL under dqptr_sem locked for read this will not break usual
charging functions (alloc/claim/free, etc.) because pointer assignment
is atomic for all arch. The only changes we have to do to prevent races
is just atomically copy i_dquot[] to local variable at the beginning of
each charging function.
Race between concurrent init() functions is solved by cmpxchg().

Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
---
 fs/quota/dquot.c |   87 ++++++++++++++++++++++++++++++++---------------------
 1 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index de7e7de..6979224 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1246,6 +1246,9 @@ static int info_bdq_free(struct dquot *dquot, qsize_t space)
  *	Initialize quota pointers in inode
  *	We do things in a bit complicated way but by that we avoid calling
  *	dqget() and thus filesystem callbacks under dqptr_sem.
+ *	Lock dqptr_sem for read here, is acceptable, because i_dquot it is
+ *	acceptable to change the value from NULL => !NULL, opposite change
+ *	must be protected by dqptr_sem for write.
  */
 int dquot_initialize(struct inode *inode, int type)
 {
@@ -1258,6 +1261,9 @@ int dquot_initialize(struct inode *inode, int type)
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return 0;
+	/* Check whenever quota was already initialized */
+	if (!dqinit_needed(inode, type))
+		return 0;
 
 	/* First get references to structures we might need. */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1274,7 +1280,7 @@ int dquot_initialize(struct inode *inode, int type)
 		got[cnt] = dqget(sb, id, cnt);
 	}
 
-	down_write(&sb_dqopt(sb)->dqptr_sem);
+	down_read(&sb_dqopt(sb)->dqptr_sem);
 	/* Having dqptr_sem we know NOQUOTA flags can't be altered... */
 	if (IS_NOQUOTA(inode))
 		goto out_err;
@@ -1284,13 +1290,11 @@ int dquot_initialize(struct inode *inode, int type)
 		/* Avoid races with quotaoff() */
 		if (!sb_has_quota_active(sb, cnt))
 			continue;
-		if (!inode->i_dquot[cnt]) {
-			inode->i_dquot[cnt] = got[cnt];
+		if (!cmpxchg(&inode->i_dquot[cnt], NULL, got[cnt]))
 			got[cnt] = NULL;
-		}
 	}
 out_err:
-	up_write(&sb_dqopt(sb)->dqptr_sem);
+	up_read(&sb_dqopt(sb)->dqptr_sem);
 	/* Drop unused references */
 	dqput_all(got);
 	return ret;
@@ -1416,6 +1420,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 {
 	int cnt, ret = QUOTA_OK;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
 
 	/*
 	 * First test before acquiring mutex - solves deadlocks when we
@@ -1432,14 +1437,16 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		goto out_unlock;
 	}
 
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		dquot[cnt] = inode->i_dquot[cnt];
 		warntype[cnt] = QUOTA_NL_NOWARN;
+	}
 
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
-		if (check_bdq(inode->i_dquot[cnt], number, warn, warntype+cnt)
+		if (check_bdq(dquot[cnt], number, warn, warntype+cnt)
 		    == NO_QUOTA) {
 			ret = NO_QUOTA;
 			spin_unlock(&dq_data_lock);
@@ -1447,21 +1454,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number,
 		}
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
 		if (reserve)
-			dquot_resv_space(inode->i_dquot[cnt], number);
+			dquot_resv_space(dquot[cnt], number);
 		else
-			dquot_incr_space(inode->i_dquot[cnt], number);
+			dquot_incr_space(dquot[cnt], number);
 	}
 	inode_incr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
 
 	if (reserve)
 		goto out_flush_warn;
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(dquot);
 out_flush_warn:
-	flush_warnings(inode->i_dquot, warntype);
+	flush_warnings(dquot, warntype);
 out_unlock:
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
@@ -1487,13 +1494,16 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 {
 	int cnt, ret = NO_QUOTA;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (IS_NOQUOTA(inode))
 		return QUOTA_OK;
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
+		dquot[cnt] = inode->i_dquot[cnt];
 		warntype[cnt] = QUOTA_NL_NOWARN;
+	}
 	down_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {
 		up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
@@ -1501,24 +1511,24 @@ int dquot_alloc_inode(const struct inode *inode, qsize_t number)
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
-		if (check_idq(inode->i_dquot[cnt], number, warntype+cnt)
+		if (check_idq(dquot[cnt], number, warntype+cnt)
 		    == NO_QUOTA)
 			goto warn_put_all;
 	}
 
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		if (!dquot[cnt])
 			continue;
-		dquot_incr_inodes(inode->i_dquot[cnt], number);
+		dquot_incr_inodes(dquot[cnt], number);
 	}
 	ret = QUOTA_OK;
 warn_put_all:
 	spin_unlock(&dq_data_lock);
 	if (ret == QUOTA_OK)
-		mark_all_dquot_dirty(inode->i_dquot);
-	flush_warnings(inode->i_dquot, warntype);
+		mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return ret;
 }
@@ -1528,6 +1538,7 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 {
 	int cnt;
 	int ret = QUOTA_OK;
+	struct dquot *dquot[MAXQUOTAS];
 
 	if (IS_NOQUOTA(inode)) {
 		inode_claim_rsv_space(inode, number);
@@ -1544,14 +1555,14 @@ int dquot_claim_space(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_claim_reserved_space(inode->i_dquot[cnt],
-							number);
+		dquot[cnt] = inode->i_dquot[cnt];
+		if (dquot[cnt])
+			dquot_claim_reserved_space(dquot[cnt], number);
 	}
 	/* Update inode bytes */
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(dquot);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 out:
 	return ret;
@@ -1565,6 +1576,8 @@ int __dquot_free_space(struct inode *inode, qsize_t number, int reserve)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
+
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1582,22 +1595,23 @@ out_sub:
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		dquot[cnt] = inode->i_dquot[cnt];
+		if (!dquot[cnt])
 			continue;
-		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_bdq_free(dquot[cnt], number);
 		if (reserve)
-			dquot_free_reserved_space(inode->i_dquot[cnt], number);
+			dquot_free_reserved_space(dquot[cnt], number);
 		else
-			dquot_decr_space(inode->i_dquot[cnt], number);
+			dquot_decr_space(dquot[cnt], number);
 	}
 	inode_decr_space(inode, number, reserve);
 	spin_unlock(&dq_data_lock);
 
 	if (reserve)
 		goto out_unlock;
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(dquot);
 out_unlock:
-	flush_warnings(inode->i_dquot, warntype);
+	flush_warnings(dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
@@ -1625,6 +1639,8 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS];
+
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1639,14 +1655,15 @@ int dquot_free_inode(const struct inode *inode, qsize_t number)
 	}
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (!inode->i_dquot[cnt])
+		dquot[cnt] = inode->i_dquot[cnt];
+		if (!dquot[cnt])
 			continue;
-		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], number);
-		dquot_decr_inodes(inode->i_dquot[cnt], number);
+		warntype[cnt] = info_idq_free(dquot[cnt], number);
+		dquot_decr_inodes(dquot[cnt], number);
 	}
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
-	flush_warnings(inode->i_dquot, warntype);
+	mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
 	up_read(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	return QUOTA_OK;
 }
-- 
1.6.0.4

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux