[RFC PATCH v2 2/3] xfs: introduce perag iunlink lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently, we use AGI buffer lock to protect in-memory linked list
for unlinked inodes but since it's not necessary to modify AGI
unless the head of the unlinked list is modified.

Therefore, let's add another per-AG dedicated lock to protect the
whole list, and removing AGI buffer lock in xfs_iunlink_remove()
if the head is untouched.

Note that some paths (e.g. xfs_create_tmpfile()) take AGI lock in
its transaction in advance, so the proper locking order is only
AGI lock -> unlinked list lock.

Signed-off-by: Gao Xiang <hsiangkao@xxxxxxxxxx>
---
 fs/xfs/xfs_inode.c | 79 ++++++++++++++++++++++++++++++++++++++--------
 fs/xfs/xfs_mount.c |  1 +
 fs/xfs/xfs_mount.h |  3 ++
 3 files changed, 70 insertions(+), 13 deletions(-)

diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 4994398373df..d78aaa8ce252 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2039,6 +2039,46 @@ xfs_iunlink_insert_inode(
 	return xfs_iunlink_update_bucket(tp, agno, agibp, 0, agino);
 }
 
+/*
+ * Lock the perag and take AGI lock if agi_unlinked is touched as well for
+ * xfs_iunlink_remove_inode().
+ *
+ * Inode allocation in the O_TMPFILE path defines the AGI/unlinked list lock
+ * order as being AGI->perag unlinked list lock. We are inverting it here as
+ * the fast path tail addition does not need to modify the AGI at all. Hence
+ * we only need the AGI lock if the tail is empty, correct lock order.
+ */
+static struct xfs_perag *
+xfs_iunlink_remove_lock(
+	xfs_agino_t		agno,
+	struct xfs_trans        *tp,
+	struct xfs_inode	*ip,
+	struct xfs_buf		**agibpp)
+{
+	struct xfs_mount        *mp = tp->t_mountp;
+	struct xfs_perag	*pag;
+	int			error;
+
+	pag = xfs_perag_get(mp, agno);
+	mutex_lock(&pag->pag_unlinked_mutex);
+
+	if (ip->i_prev_unlinked == NULLAGINO) {
+		mutex_unlock(&pag->pag_unlinked_mutex);
+		/*
+		 * some paths (e.g. xfs_create_tmpfile) could take AGI lock
+		 * in this transaction in advance and the only locking order
+		 * AGI buf lock -> pag_unlinked_mutex is safe.
+		 */
+		error = xfs_read_agi(mp, tp, agno, agibpp);
+		if (error) {
+			xfs_perag_put(pag);
+			return ERR_PTR(error);
+		}
+		mutex_lock(&pag->pag_unlinked_mutex);
+	}
+	return pag;
+}
+
 /*
  * Remove can be from anywhere in the list, so we have to do two adjacent inode
  * lookups here so we can update list pointers. We may be at the head or the
@@ -2046,12 +2086,12 @@ xfs_iunlink_insert_inode(
  */
 static int
 xfs_iunlink_remove_inode(
+	struct xfs_perag	*pag,
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agibp,
 	struct xfs_inode	*ip)
 {
 	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_agi		*agi = agibp->b_addr;
 	xfs_agino_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	xfs_agino_t		agino = XFS_INO_TO_AGINO(mp, ip->i_ino);
 	xfs_agino_t		next_agino = ip->i_next_unlinked;
@@ -2059,6 +2099,7 @@ xfs_iunlink_remove_inode(
 
 	if (ip->i_prev_unlinked == NULLAGINO) {
 		struct xlog	*log = mp->m_log;
+		struct xfs_agi	*agi = agibp->b_addr;
 		short		bucket_index = 0;
 
 		/* During recovery, the old multiple bucket can be applied */
@@ -2083,7 +2124,7 @@ xfs_iunlink_remove_inode(
 		/* lookup previous inode and update to point at next */
 		struct xfs_inode	*pip;
 
-		pip = xfs_iunlink_lookup_prev(agibp->b_pag, ip);
+		pip = xfs_iunlink_lookup_prev(pag, ip);
 		if (IS_ERR_OR_NULL(pip))
 			return -EFSCORRUPTED;
 
@@ -2102,7 +2143,7 @@ xfs_iunlink_remove_inode(
 	if (ip->i_next_unlinked != NULLAGINO) {
 		struct xfs_inode	*nip;
 
-		nip = xfs_iunlink_lookup_next(agibp->b_pag, ip);
+		nip = xfs_iunlink_lookup_next(pag, ip);
 		if (IS_ERR_OR_NULL(nip))
 			return -EFSCORRUPTED;
 
@@ -2126,6 +2167,15 @@ xfs_iunlink_remove_inode(
 	return 0;
 }
 
+static void
+xfs_iunlink_unlock(
+	struct xfs_perag	*pag)
+{
+	/* Does not unlock AGI, ever. xfs_trans_commit() does that. */
+	mutex_unlock(&pag->pag_unlinked_mutex);
+	xfs_perag_put(pag);
+}
+
 /*
  * This is called when the inode's link count has gone to 0 or we are creating
  * a tmpfile via O_TMPFILE.  The inode @ip must have nlink == 0.
@@ -2143,7 +2193,6 @@ xfs_iunlink(
 	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
 	int			error;
 
-	ASSERT(ip->i_next_unlinked == NULLAGINO);
 	ASSERT(VFS_I(ip)->i_nlink == 0);
 	ASSERT(VFS_I(ip)->i_mode != 0);
 	trace_xfs_iunlink(ip);
@@ -2153,7 +2202,10 @@ xfs_iunlink(
 	if (error)
 		return error;
 
-	return xfs_iunlink_insert_inode(tp, agibp, ip);
+	mutex_lock(&agibp->b_pag->pag_unlinked_mutex);
+	error = xfs_iunlink_insert_inode(tp, agibp, ip);
+	mutex_unlock(&agibp->b_pag->pag_unlinked_mutex);
+	return error;
 }
 
 /*
@@ -2164,19 +2216,20 @@ xfs_iunlink_remove(
 	struct xfs_trans	*tp,
 	struct xfs_inode	*ip)
 {
-	struct xfs_mount	*mp = tp->t_mountp;
-	struct xfs_buf		*agibp;
-	xfs_agnumber_t		agno = XFS_INO_TO_AGNO(mp, ip->i_ino);
+	struct xfs_buf		*agibp = NULL;
+	struct xfs_perag	*pag;
 	int			error;
 
 	trace_xfs_iunlink_remove(ip);
 
-	/* Get the agi buffer first.  It ensures lock ordering on the list. */
-	error = xfs_read_agi(mp, tp, agno, &agibp);
-	if (error)
-		return error;
+	pag = xfs_iunlink_remove_lock(XFS_INO_TO_AGNO(tp->t_mountp, ip->i_ino),
+		tp, ip, &agibp);
+	if (IS_ERR(pag))
+		return PTR_ERR(pag);
 
-	return xfs_iunlink_remove_inode(tp, agibp, ip);
+	error = xfs_iunlink_remove_inode(pag, tp, agibp, ip);
+	xfs_iunlink_unlock(pag);
+	return error;
 }
 
 /*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 031e96ff022d..f94e14059e61 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -223,6 +223,7 @@ xfs_initialize_perag(
 		if (first_initialised == NULLAGNUMBER)
 			first_initialised = index;
 		spin_lock_init(&pag->pag_state_lock);
+		mutex_init(&pag->pag_unlinked_mutex);
 	}
 
 	index = xfs_set_inode_alloc(mp, agcount);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index a72cfcaa4ad1..9a1d0f239fa4 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -372,6 +372,9 @@ typedef struct xfs_perag {
 	/* reference count */
 	uint8_t			pagf_refcount_level;
 
+	/* lock to protect unlinked inode list */
+	struct mutex            pag_unlinked_mutex;
+
 	/*
 	 * Unlinked inode information.  This incore information reflects
 	 * data stored in the AGI, so callers must hold the AGI buffer lock
-- 
2.18.1




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux