[PATCH 2/2] xfs: Prevent deadlock when allocating blocks for AGFL

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Executing xfs/538 after disabling injection of bmap_alloc_minlen_extent error
can cause several tasks to trigger hung task timeout. Most of the tasks are
blocked on getting a lock on an AG's AGF buffer. However, The task which has
the lock on the AG's AGF buffer has the following call trace,

PID: 1341   TASK: ffff8881073f3700  CPU: 1   COMMAND: "fsstress"
   __schedule+0x22f at ffffffff81f75e8f
   schedule+0x46 at ffffffff81f76366
   xfs_extent_busy_flush+0x69 at ffffffff81477d99
   xfs_alloc_ag_vextent_size+0x16a at ffffffff8141711a
   xfs_alloc_ag_vextent+0x19b at ffffffff81417edb
   xfs_alloc_fix_freelist+0x22f at ffffffff8141896f
   xfs_free_extent_fix_freelist+0x6a at ffffffff8141939a
   __xfs_free_extent+0x99 at ffffffff81419499
   xfs_trans_free_extent+0x3e at ffffffff814a6fee
   xfs_extent_free_finish_item+0x24 at ffffffff814a70d4
   xfs_defer_finish_noroll+0x1f7 at ffffffff81441407
   xfs_defer_finish+0x11 at ffffffff814417e1
   xfs_itruncate_extents_flags+0x13d at ffffffff8148b7dd
   xfs_inactive_truncate+0xb9 at ffffffff8148bb89
   xfs_inactive+0x227 at ffffffff8148c4f7
   xfs_fs_destroy_inode+0xb8 at ffffffff81496898
   destroy_inode+0x3b at ffffffff8127d2ab
   do_unlinkat+0x1d1 at ffffffff81270df1
   do_syscall_64+0x40 at ffffffff81f6b5f0
   entry_SYSCALL_64_after_hwframe+0x44 at ffffffff8200007c

The following sequence of events lead to the above listed call trace,

1. The task frees atleast two extents belonging to the file being truncated.
2. The corresponding xfs_extent_free_items are stored in the list pointed to
   by xfs_defer_pending->dfp_work.
3. When executing the next step of the rolling transaction, The first of the
   above mentioned extents is freed. The corresponding busy extent entry is
   added to the current transaction's tp->t_busy list as well as to the perag
   rb tree at xfs_perag->pagb_tree.
4. When trying to free the second extent, XFS determines that the AGFL needs
   to be populated and hence tries to allocate free blocks.
5. The only free extent whose size is >= xfs_alloc_arg->maxlen
   happens to be the first extent that was freed by the current transaction.
6. Hence xfs_alloc_ag_vextent_size() flushes the CIL in the hope of clearing
   the busy status of the extent and waits for the busy generation number to
   change.
7. However, flushing the CIL is futile since the busy extent is still in the
   current transaction's tp->t_busy list.

Here the task ends up waiting indefinitely.

This commit fixes the bug by preventing a CIL flush if all free extents are
busy and all of them are in the transaction's tp->t_busy list.

Signed-off-by: Chandan Babu R <chandanrlinux@xxxxxxxxx>
---
 fs/xfs/libxfs/xfs_alloc.c | 59 +++++++++++++++++++++++++++++----------
 fs/xfs/xfs_extent_busy.c  |  6 +++-
 fs/xfs/xfs_extent_busy.h  |  2 +-
 3 files changed, 51 insertions(+), 16 deletions(-)

diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 7dc50a435cf4..8e5c91bd3031 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -274,7 +274,8 @@ xfs_alloc_compute_aligned(
 	xfs_extlen_t	foundlen,	/* length in found extent */
 	xfs_agblock_t	*resbno,	/* result block number */
 	xfs_extlen_t	*reslen,	/* result length */
-	unsigned	*busy_gen)
+	unsigned	*busy_gen,
+	bool		*busy_in_trans)
 {
 	xfs_agblock_t	bno = foundbno;
 	xfs_extlen_t	len = foundlen;
@@ -282,7 +283,7 @@ xfs_alloc_compute_aligned(
 	bool		busy;
 
 	/* Trim busy sections out of found extent */
-	busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen);
+	busy = xfs_extent_busy_trim(args, &bno, &len, busy_gen, busy_in_trans);
 
 	/*
 	 * If we have a largish extent that happens to start before min_agbno,
@@ -852,7 +853,7 @@ xfs_alloc_cur_check(
 	}
 
 	busy = xfs_alloc_compute_aligned(args, bno, len, &bnoa, &lena,
-					 &busy_gen);
+					 &busy_gen, NULL);
 	acur->busy |= busy;
 	if (busy)
 		acur->busy_gen = busy_gen;
@@ -1248,7 +1249,7 @@ xfs_alloc_ag_vextent_exact(
 	 */
 	tbno = fbno;
 	tlen = flen;
-	xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen);
+	xfs_extent_busy_trim(args, &tbno, &tlen, &busy_gen, NULL);
 
 	/*
 	 * Give up if the start of the extent is busy, or the freespace isn't
@@ -1669,6 +1670,9 @@ xfs_alloc_ag_vextent_size(
 	xfs_extlen_t	rlen;		/* length of returned extent */
 	bool		busy;
 	unsigned	busy_gen;
+	bool		busy_in_trans;
+	bool		all_busy_in_trans;
+	bool		alloc_small_extent;
 
 restart:
 	/*
@@ -1677,6 +1681,10 @@ xfs_alloc_ag_vextent_size(
 	cnt_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 		args->agno, XFS_BTNUM_CNT);
 	bno_cur = NULL;
+	alloc_small_extent = false;
+	all_busy_in_trans = true;
+
+restart_alloc_small_extent:
 	busy = false;
 
 	/*
@@ -1687,13 +1695,14 @@ xfs_alloc_ag_vextent_size(
 		goto error0;
 
 	/*
-	 * If none then we have to settle for a smaller extent. In the case that
-	 * there are no large extents, this will return the last entry in the
-	 * tree unless the tree is empty. In the case that there are only busy
-	 * large extents, this will return the largest small extent unless there
-	 * are no smaller extents available.
+	 * We have to settle for a smaller extent if there are no maxlen +
+	 * alignment - 1 sized extents or if all larger free extents are still
+	 * in current transaction's busy list. In either case, this will return
+	 * the last entry in the tree unless the tree is empty. In the case that
+	 * there are only busy large extents, this will return the largest small
+	 * extent unless there are no smaller extents available.
 	 */
-	if (!i) {
+	if (!i || alloc_small_extent) {
 		error = xfs_alloc_ag_vextent_small(args, cnt_cur,
 						   &fbno, &flen, &i);
 		if (error)
@@ -1705,7 +1714,9 @@ xfs_alloc_ag_vextent_size(
 		}
 		ASSERT(i == 1);
 		busy = xfs_alloc_compute_aligned(args, fbno, flen, &rbno,
-				&rlen, &busy_gen);
+				&rlen, &busy_gen, &busy_in_trans);
+		if (busy && !busy_in_trans)
+			all_busy_in_trans = false;
 	} else {
 		/*
 		 * Search for a non-busy extent that is large enough.
@@ -1720,15 +1731,32 @@ xfs_alloc_ag_vextent_size(
 			}
 
 			busy = xfs_alloc_compute_aligned(args, fbno, flen,
-					&rbno, &rlen, &busy_gen);
+					&rbno, &rlen, &busy_gen,
+					&busy_in_trans);
 
 			if (rlen >= args->maxlen)
 				break;
 
+			if (busy && !busy_in_trans)
+				all_busy_in_trans = false;
+
 			error = xfs_btree_increment(cnt_cur, 0, &i);
 			if (error)
 				goto error0;
 			if (i == 0) {
+				/*
+				 * All of the large free extents are busy in the
+				 * current transaction's t->t_busy
+				 * list. Hence, flushing the CIL's busy extent list
+				 * will be futile and also leads to the current
+				 * task waiting indefinitely. Hence try to
+				 * allocate smaller extents.
+				 */
+				if (all_busy_in_trans) {
+					alloc_small_extent = true;
+					goto restart_alloc_small_extent;
+				}
+
 				/*
 				 * Our only valid extents must have been busy.
 				 * Make it unbusy by forcing the log out and
@@ -1783,7 +1811,10 @@ xfs_alloc_ag_vextent_size(
 			if (flen < bestrlen)
 				break;
 			busy = xfs_alloc_compute_aligned(args, fbno, flen,
-					&rbno, &rlen, &busy_gen);
+					&rbno, &rlen, &busy_gen,
+					&busy_in_trans);
+			if (busy && !busy_in_trans)
+				all_busy_in_trans = false;
 			rlen = XFS_EXTLEN_MIN(args->maxlen, rlen);
 			if (XFS_IS_CORRUPT(args->mp,
 					   rlen != 0 &&
@@ -1819,7 +1850,7 @@ xfs_alloc_ag_vextent_size(
 	 */
 	args->len = rlen;
 	if (rlen < args->minlen) {
-		if (busy) {
+		if (busy && !all_busy_in_trans) {
 			xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 			trace_xfs_alloc_size_busy(args);
 			xfs_extent_busy_flush(args->mp, args->pag, busy_gen);
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index a4075685d9eb..16ba514f9e81 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -334,7 +334,8 @@ xfs_extent_busy_trim(
 	struct xfs_alloc_arg	*args,
 	xfs_agblock_t		*bno,
 	xfs_extlen_t		*len,
-	unsigned		*busy_gen)
+	unsigned		*busy_gen,
+	bool			*busy_in_trans)
 {
 	xfs_agblock_t		fbno;
 	xfs_extlen_t		flen;
@@ -362,6 +363,9 @@ xfs_extent_busy_trim(
 			continue;
 		}
 
+		if (busy_in_trans)
+			*busy_in_trans = busyp->flags & XFS_EXTENT_BUSY_IN_TRANS;
+
 		if (bbno <= fbno) {
 			/* start overlap */
 
diff --git a/fs/xfs/xfs_extent_busy.h b/fs/xfs/xfs_extent_busy.h
index 929f72d1c699..dcdc70821622 100644
--- a/fs/xfs/xfs_extent_busy.h
+++ b/fs/xfs/xfs_extent_busy.h
@@ -49,7 +49,7 @@ xfs_extent_busy_reuse(struct xfs_mount *mp, xfs_agnumber_t agno,
 
 bool
 xfs_extent_busy_trim(struct xfs_alloc_arg *args, xfs_agblock_t *bno,
-		xfs_extlen_t *len, unsigned *busy_gen);
+		xfs_extlen_t *len, unsigned *busy_gen, bool *busy_in_trans);
 
 void
 xfs_extent_busy_flush(struct xfs_mount *mp, struct xfs_perag *pag,
-- 
2.30.2




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux