[PATCH 18/19] quota: remove dqptr_sem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



dqptr_sem is one of the most contenting locks for now
each dquot_initialize and dquot_transfer result in down_write(dqptr_sem)
Let's user inode->i_lock to protect i_dquot pointers. In that case all
users which modified i_dquot simply converted to that lock. But users
who hold the dqptr_sem for read(charge/uncharge methods) usually
looks like follows

down_read(&dqptr_sem)
___charge_quota()
make_quota_dirty(inode->i_dquot) --> may_sleep
up_read(&dquot_sem)

We must drop i_lock before make_quota_dirty or flush_warnings,
to protect dquot from being fried let's grab extra reference for dquot,
and drop it after we have done with dquot object.

XXX: There is a plan to get rid of extra dqget/dqput calls by protecting
     dquot destruction by SRCU.

Signed-off-by: Dmitry Monakhov <dmonakhov@xxxxxxxxxx>
---
 fs/quota/dquot.c         |  145 ++++++++++++++++++++++++++--------------------
 fs/super.c               |    1 -
 include/linux/quota.h    |    1 -
 include/linux/quotaops.h |    4 +-
 4 files changed, 84 insertions(+), 67 deletions(-)

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a7a7670..5c8ad82 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -91,18 +91,16 @@
  * in inode_add_bytes() and inode_sub_bytes().
  *
  * The spinlock ordering is hence:
- *   dq_data_lock > dq_lock > dq_list_lock > i_lock
+ *   dq_data_lock > i_lock > dq_lock > dq_list_lock
  *
  * Note that some things (eg. sb pointer, type, id) doesn't change during
  * the life of the dquot structure and so needn't to be protected by a lock
  *
- * Any operation working on dquots via inode pointers must hold dqptr_sem.  If
- * operation is just reading pointers from inode (or not using them at all) the
- * read lock is enough. If pointers are altered function must hold write lock.
+ * Any operation working on dquots via inode pointers must hold i_lock.
  * Special care needs to be taken about S_NOQUOTA inode flag (marking that
  * inode is a quota file). Functions adding pointers from inode to dquots have
- * to check this flag under dqptr_sem and then (if S_NOQUOTA is not set) they
- * have to do all pointer modifications before dropping dqptr_sem. This makes
+ * to check this flag under i_lock and then (if S_NOQUOTA is not set) they
+ * have to do all pointer modifications before dropping i_lock. This makes
  * sure they cannot race with quotaon which first sets S_NOQUOTA flag and
  * then drops all pointers to dquots from an inode.
  *
@@ -117,14 +115,8 @@
  * spinlock to internal buffers before writing.
  *
  * Lock ordering (including related VFS locks) is the following:
- *   i_mutex > dqonoff_sem > journal_lock > dqptr_sem > dquot->dq_mutex >
+ *   i_mutex > dqonoff_sem > journal_lock > dquot->dq_mutex >
  *   dqio_mutex
- * The lock ordering of dqptr_sem imposed by quota code is only dqonoff_sem >
- * dqptr_sem. But filesystem has to count with the fact that functions such as
- * dquot_alloc_space() acquire dqptr_sem and they usually have to be called
- * from inside a transaction to keep filesystem consistency after a crash. Also
- * filesystems usually want to do some IO on dquot from ->mark_dirty which is
- * called with dqptr_sem held.
  * i_mutex on quota files is special (it's below dqio_mutex)
  */
 
@@ -1024,15 +1016,18 @@ static inline int dqput_blocks(struct dquot *dquot)
 /*
  * Remove references to dquots from inode and add dquot to list for freeing
  * if we have the last referece to dquot
- * We can't race with anybody because we hold dqptr_sem for writing...
  */
 static int remove_inode_dquot_ref(struct inode *inode, int type,
 				  struct list_head *tofree_head)
 {
-	struct dquot *dquot = inode->i_dquot[type];
+	struct dquot *dquot;
 	struct quota_info *dqopt = dqopts(inode->i_sb);
 
+	spin_lock(&inode->i_lock);
+	dquot = inode->i_dquot[type];
 	inode->i_dquot[type] = NULL;
+	spin_unlock(&inode->i_lock);
+
 	if (dquot) {
 		if (dqput_blocks(dquot)) {
 #ifdef CONFIG_QUOTA_DEBUG
@@ -1086,7 +1081,7 @@ static void remove_dquot_ref(struct super_block *sb, int type,
 		 *  We have to scan also I_NEW inodes because they can already
 		 *  have quota pointer initialized. Luckily, we need to touch
 		 *  only quota pointers and these have separate locking
-		 *  (dqptr_sem).
+		 *  (i_lock).
 		 */
 		if (!IS_NOQUOTA(inode)) {
 			if (unlikely(inode_get_rsv_space(inode) > 0))
@@ -1110,9 +1105,7 @@ static void drop_dquot_ref(struct super_block *sb, int type)
 	LIST_HEAD(tofree_head);
 
 	if (dqctl(sb)->dq_op) {
-		down_write(&dqctl(sb)->dqptr_sem);
 		remove_dquot_ref(sb, type, &tofree_head);
-		up_write(&dqctl(sb)->dqptr_sem);
 		put_dquot_list(&tofree_head);
 	}
 }
@@ -1426,9 +1419,6 @@ static int dquot_active(const struct inode *inode)
 /*
  * Initialize quota pointers in inode
  *
- * We do things in a bit complicated way but by that we avoid calling
- * find_get_dquot() and thus filesystem callbacks under dqptr_sem.
- *
  * It is better to call this function outside of any transaction as it
  * might need a lot of space in journal for dquot structure allocation.
  */
@@ -1461,7 +1451,8 @@ static void __dquot_initialize(struct inode *inode, int type)
 		got[cnt] = find_get_dquot(sb, id, cnt);
 	}
 
-	down_write(&dqctl(sb)->dqptr_sem);
+	spin_lock(&inode->i_lock);
+	rsv = __inode_get_rsv_space(inode);
 	if (IS_NOQUOTA(inode))
 		goto out_err;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1480,7 +1471,6 @@ static void __dquot_initialize(struct inode *inode, int type)
 			 * Make quota reservation system happy if someone
 			 * did a write before quota was turned on
 			 */
-			rsv = inode_get_rsv_space(inode);
 			if (unlikely(rsv)) {
 				spin_lock(&got[cnt]->dq_lock);
 				dquot_resv_space(got[cnt], rsv);
@@ -1489,7 +1479,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 		}
 	}
 out_err:
-	up_write(&dqctl(sb)->dqptr_sem);
+	spin_unlock(&inode->i_lock);
 	/* Drop unused references */
 	dqput_all(got);
 }
@@ -1508,12 +1498,12 @@ static void __dquot_drop(struct inode *inode)
 	int cnt;
 	struct dquot *put[MAXQUOTAS];
 
-	down_write(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_lock(&inode->i_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		put[cnt] = inode->i_dquot[cnt];
 		inode->i_dquot[cnt] = NULL;
 	}
-	up_write(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_unlock(&inode->i_lock);
 	dqput_all(put);
 }
 
@@ -1652,6 +1642,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 	int warn = flags & DQUOT_SPACE_WARN;
 	int reserve = flags & DQUOT_SPACE_RESERVE;
 	int nofail = flags & DQUOT_SPACE_NOFAIL;
+	struct dquot *dquot[MAXQUOTAS] = {};
 
 	/*
 	 * First test before acquiring mutex - solves deadlocks when we
@@ -1662,18 +1653,21 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 		goto out;
 	}
 
-	down_read(&dqctl(inode->i_sb)->dqptr_sem);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
 
+	spin_lock(&inode->i_lock);
 	lock_inode_dquots(inode->i_dquot);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
+		dquot[cnt] = inode->i_dquot[cnt];
+		dqget(dquot[cnt]);
 		ret = check_bdq(inode->i_dquot[cnt], number, !warn,
 				warntype + cnt);
 		if (ret && !nofail) {
 			unlock_inode_dquots(inode->i_dquot);
+			spin_unlock(&inode->i_lock);
 			goto out_flush_warn;
 		}
 	}
@@ -1685,15 +1679,14 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 		else
 			dquot_incr_space(inode->i_dquot[cnt], number);
 	}
-	inode_incr_space(inode, number, reserve);
+	__inode_incr_space(inode, number, reserve);
 	unlock_inode_dquots(inode->i_dquot);
-
-	if (reserve)
-		goto out_flush_warn;
-	mark_all_dquot_dirty(inode->i_dquot);
+	spin_unlock(&inode->i_lock);
+	if (!reserve)
+		mark_all_dquot_dirty(dquot);
 out_flush_warn:
-	flush_warnings(inode->i_dquot, warntype);
-	up_read(&dqctl(inode->i_sb)->dqptr_sem);
+	flush_warnings(dquot, warntype);
+	dqput_all(dquot);
 out:
 	return ret;
 }
@@ -1702,10 +1695,11 @@ EXPORT_SYMBOL(__dquot_alloc_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode)
+int dquot_alloc_inode(struct inode *inode)
 {
 	int cnt, ret = 0;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS] = {};
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1713,11 +1707,14 @@ int dquot_alloc_inode(const struct inode *inode)
 		return 0;
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype[cnt] = QUOTA_NL_NOWARN;
-	down_read(&dqctl(inode->i_sb)->dqptr_sem);
+
+	spin_lock(&inode->i_lock);
 	lock_inode_dquots(inode->i_dquot);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
+		dquot[cnt] = inode->i_dquot[cnt];
+		dqget(dquot[cnt]);
 		ret = check_idq(inode->i_dquot[cnt], 1, warntype + cnt);
 		if (ret)
 			goto warn_put_all;
@@ -1731,10 +1728,11 @@ int dquot_alloc_inode(const struct inode *inode)
 
 warn_put_all:
 	unlock_inode_dquots(inode->i_dquot);
+	spin_unlock(&inode->i_lock);
 	if (ret == 0)
-		mark_all_dquot_dirty(inode->i_dquot);
-	flush_warnings(inode->i_dquot, warntype);
-	up_read(&dqctl(inode->i_sb)->dqptr_sem);
+		mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
+	dqput_all(dquot);
 	return ret;
 }
 EXPORT_SYMBOL(dquot_alloc_inode);
@@ -1745,25 +1743,30 @@ EXPORT_SYMBOL(dquot_alloc_inode);
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 {
 	int cnt;
+	struct dquot *dquot[MAXQUOTAS] = {};
 
 	if (!dquot_active(inode)) {
 		inode_claim_rsv_space(inode, number);
 		return 0;
 	}
 
-	down_read(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_lock(&inode->i_lock);
 	lock_inode_dquots(inode->i_dquot);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
+		if (inode->i_dquot[cnt]) {
+			dquot[cnt] = inode->i_dquot[cnt];
+			dqget(dquot[cnt]);
 			dquot_claim_reserved_space(inode->i_dquot[cnt],
 							number);
+		}
 	}
 	/* Update inode bytes */
-	inode_claim_rsv_space(inode, number);
+	__inode_claim_rsv_space(inode, number);
 	unlock_inode_dquots(inode->i_dquot);
-	mark_all_dquot_dirty(inode->i_dquot);
-	up_read(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_unlock(&inode->i_lock);
+	mark_all_dquot_dirty(dquot);
+	dqput_all(dquot);
 	return 0;
 }
 EXPORT_SYMBOL(dquot_claim_space_nodirty);
@@ -1776,6 +1779,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
 	int reserve = flags & DQUOT_SPACE_RESERVE;
+	struct dquot *dquot[MAXQUOTAS] = {};
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
@@ -1784,54 +1788,60 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 		return;
 	}
 
-	down_read(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_lock(&inode->i_lock);
 	lock_inode_dquots(inode->i_dquot);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
+		dquot[cnt] = inode->i_dquot[cnt];
+		dqget(dquot[cnt]);
 		warntype[cnt] = info_bdq_free(inode->i_dquot[cnt], number);
 		if (reserve)
 			dquot_free_reserved_space(inode->i_dquot[cnt], number);
 		else
 			dquot_decr_space(inode->i_dquot[cnt], number);
 	}
-	inode_decr_space(inode, number, reserve);
+	__inode_decr_space(inode, number, reserve);
 	unlock_inode_dquots(inode->i_dquot);
+	spin_unlock(&inode->i_lock);
 
-	if (reserve)
-		goto out_unlock;
-	mark_all_dquot_dirty(inode->i_dquot);
-out_unlock:
-	flush_warnings(inode->i_dquot, warntype);
-	up_read(&dqctl(inode->i_sb)->dqptr_sem);
+	if (!reserve)
+		mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
+	dqput_all(dquot);
 }
 EXPORT_SYMBOL(__dquot_free_space);
 
 /*
  * This operation can block, but only after everything is updated
  */
-void dquot_free_inode(const struct inode *inode)
+void dquot_free_inode(struct inode *inode)
 {
 	unsigned int cnt;
 	char warntype[MAXQUOTAS];
+	struct dquot *dquot[MAXQUOTAS] = {};
 
 	/* First test before acquiring mutex - solves deadlocks when we
          * re-enter the quota code and are already holding the mutex */
 	if (!dquot_active(inode))
 		return;
 
-	down_read(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_lock(&inode->i_lock);
 	lock_inode_dquots(inode->i_dquot);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
 		if (!inode->i_dquot[cnt])
 			continue;
+		dquot[cnt] = inode->i_dquot[cnt];
+		dqget(dquot[cnt]);
 		warntype[cnt] = info_idq_free(inode->i_dquot[cnt], 1);
 		dquot_decr_inodes(inode->i_dquot[cnt], 1);
 	}
 	unlock_inode_dquots(inode->i_dquot);
-	mark_all_dquot_dirty(inode->i_dquot);
-	flush_warnings(inode->i_dquot, warntype);
-	up_read(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_unlock(&inode->i_lock);
+
+	mark_all_dquot_dirty(dquot);
+	flush_warnings(dquot, warntype);
+	dqput_all(dquot);
 }
 EXPORT_SYMBOL(dquot_free_inode);
 
@@ -1862,9 +1872,10 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	/* Initialize the arrays */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype_to[cnt] = QUOTA_NL_NOWARN;
-	down_write(&dqctl(inode->i_sb)->dqptr_sem);
+
+	spin_lock(&inode->i_lock);
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
-		up_write(&dqctl(inode->i_sb)->dqptr_sem);
+		spin_unlock(&inode->i_lock);
 		return 0;
 	}
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1873,15 +1884,19 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		 */
 		if (!transfer_to[cnt])
 			continue;
+		dqget(transfer_to[cnt]);
 		/* Avoid races with quotaoff() */
 		if (!sb_has_quota_active(inode->i_sb, cnt))
 			continue;
 		is_valid[cnt] = 1;
 		transfer_from[cnt] = inode->i_dquot[cnt];
+		if (transfer_from[cnt])
+			dqget(transfer_from[cnt]);
+
 	}
 	lock_dquot_double(transfer_from, transfer_to);
-	cur_space = inode_get_bytes(inode);
-	rsv_space = inode_get_rsv_space(inode);
+	cur_space = __inode_get_bytes(inode);
+	rsv_space = __inode_get_rsv_space(inode);
 	space = cur_space + rsv_space;
 	/* Build the transfer_from list and check the limits */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
@@ -1921,13 +1936,15 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 	}
 	unlock_inode_dquots(transfer_to);
 	unlock_inode_dquots(transfer_from);
-	up_write(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_unlock(&inode->i_lock);
 
 	mark_all_dquot_dirty(transfer_from);
 	mark_all_dquot_dirty(transfer_to);
 	flush_warnings(transfer_to, warntype_to);
 	flush_warnings(transfer_from, warntype_from_inodes);
 	flush_warnings(transfer_from, warntype_from_space);
+	dqput_all(transfer_to);
+	dqput_all(transfer_from);
 	/* Pass back references to put */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		if (is_valid[cnt])
@@ -1936,8 +1953,10 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 over_quota:
 	unlock_inode_dquots(transfer_to);
 	unlock_inode_dquots(transfer_from);
-	up_write(&dqctl(inode->i_sb)->dqptr_sem);
+	spin_unlock(&inode->i_lock);
 	flush_warnings(transfer_to, warntype_to);
+	dqput_all(transfer_to);
+	dqput_all(transfer_from);
 	return ret;
 }
 EXPORT_SYMBOL(__dquot_transfer);
diff --git a/fs/super.c b/fs/super.c
index 9eea8e9..2f2090c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -104,7 +104,6 @@ static struct super_block *alloc_super(struct file_system_type *type)
 		mutex_init(&s->s_vfs_rename_mutex);
 		lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
 		mutex_init(&s->s_dquot.dqonoff_mutex);
-		init_rwsem(&s->s_dquot.dqptr_sem);
 		init_waitqueue_head(&s->s_wait_unfrozen);
 		s->s_maxbytes = MAX_NON_LFS;
 		s->s_op = &default_op;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 867848b..834ed1b 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -409,7 +409,6 @@ struct quota_ctl_info {
 	unsigned int flags;			/* Flags for diskquotas on this device */
 
 	struct mutex dqonoff_mutex;		/* Serialize quotaon & quotaoff */
-	struct rw_semaphore dqptr_sem;	/* serialize ops using quota_info struct, pointers from inode to dquots */
 	const struct quotactl_ops *qcop;
 	const struct dquot_operations *dq_op;
 	struct quota_info *dq_opt;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 4259da8..2675ff7 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -64,10 +64,10 @@ void dquot_destroy(struct dquot *dquot);
 int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
 void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
 
-int dquot_alloc_inode(const struct inode *inode);
+int dquot_alloc_inode(struct inode *inode);
 
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
-void dquot_free_inode(const struct inode *inode);
+void dquot_free_inode(struct inode *inode);
 
 int dquot_disable(struct super_block *sb, int type, unsigned int flags);
 /* Suspend quotas on remount RO */
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux