Re: [PATCH 05/10] xfs: don't assert fail with AIL lock held

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, May 02, 2018 at 06:01:52PM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Been hitting AIL ordering assert failures recently, but been unable
> to trace them down because the system immediately hangs up onteh

							     on the

> spinlock that was held when this assert fires:
> 
> XFS: Assertion failed: XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0, file: fs/xfs/xfs_trans_ail.c, line: 52
> 
> Move the assertions outside of the spinlock so the corpse can
> be dissected.
> 
> Signed-Off-By: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_trans_ail.c | 40 ++++++++++++++++++++++++++++++----------
>  1 file changed, 30 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
> index 50611d2bcbc2..58a2cf6fd4d9 100644
> --- a/fs/xfs/xfs_trans_ail.c
> +++ b/fs/xfs/xfs_trans_ail.c
> @@ -32,13 +32,19 @@
>  #ifdef DEBUG
>  /*
>   * Check that the list is sorted as it should be.
> + *
> + * Called with the ail lock held, but we don't want to assert fail with it
> + * held otherwise we'll lock everything up and won't be able to debug the
> + * cause. Hence jump through hoops to drop teh lock before assert failing.

					      the

> + * Asserts may not be fatal, so pick teh lock back up and continue onwards.

					the

>   */
>  STATIC void
>  xfs_ail_check(
> -	struct xfs_ail	*ailp,
> -	xfs_log_item_t	*lip)
> +	struct xfs_ail		*ailp,
> +	struct xfs_log_item	*lip)
>  {
> -	xfs_log_item_t	*prev_lip;
> +	struct xfs_log_item	*prev_lip;
> +	struct xfs_log_item	*next_lip;
>  
>  	if (list_empty(&ailp->ail_head))
>  		return;
> @@ -46,15 +52,29 @@ xfs_ail_check(
>  	/*
>  	 * Check the next and previous entries are valid.
>  	 */
> -	ASSERT(test_bit(XFS_LI_IN_AIL, &lip->li_flags));
> -	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
> -	if (&prev_lip->li_ail != &ailp->ail_head)
> -		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) <= 0);
> +	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
> +		spin_unlock(&ailp->ail_lock);
> +		ASSERT(0);
> +		spin_lock(&ailp->ail_lock);
> +	}
>  
> -	prev_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
> -	if (&prev_lip->li_ail != &ailp->ail_head)
> -		ASSERT(XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) >= 0);
> +	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
> +	if (&prev_lip->li_ail != &ailp->ail_head) {
> +		if (XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) > 0) {
> +			spin_unlock(&ailp->ail_lock);
> +			ASSERT(0);
> +			spin_lock(&ailp->ail_lock);
> +		}
> +	}
>  
> +	next_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
> +	if (&next_lip->li_ail != &ailp->ail_head) {
> +		if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) < 0) {
> +			spin_unlock(&ailp->ail_lock);
> +			ASSERT(0);
> +			spin_lock(&ailp->ail_lock);
> +		}
> +	}

Otherwise seems Ok, but kind of ugly. What about something like the
following diff (applied on top of this patch)? Still hacky, but it
avoids the multiple lock cycles for each check failure and preserves the
actual assert strings. (Untested and probably could use comment
updates..).

Brian

--- 8< ---

diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 58a2cf6fd4d9..98798d15b863 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -45,37 +45,36 @@ xfs_ail_check(
 {
 	struct xfs_log_item	*prev_lip;
 	struct xfs_log_item	*next_lip;
+	xfs_lsn_t		prev_lsn = NULLCOMMITLSN;
+	xfs_lsn_t		next_lsn = NULLCOMMITLSN;
+	xfs_lsn_t		lsn;
+	bool			in_ail;
 
 	if (list_empty(&ailp->ail_head))
 		return;
 
+	in_ail = test_bit(XFS_LI_IN_AIL, &lip->li_flags);
+	prev_lip = list_entry(lip->li_ail.prev, struct xfs_log_item, li_ail);
+	if (&prev_lip->li_ail != &ailp->ail_head)
+		prev_lsn = prev_lip->li_lsn;
+	next_lip = list_entry(lip->li_ail.next, struct xfs_log_item, li_ail);
+	if (&next_lip->li_ail != &ailp->ail_head)
+		next_lsn = next_lip->li_lsn;
+	lsn = lip->li_lsn;
+
 	/*
 	 * Check the next and previous entries are valid.
 	 */
-	if (!test_bit(XFS_LI_IN_AIL, &lip->li_flags)) {
-		spin_unlock(&ailp->ail_lock);
-		ASSERT(0);
-		spin_lock(&ailp->ail_lock);
-	}
-
-	prev_lip = list_entry(lip->li_ail.prev, xfs_log_item_t, li_ail);
-	if (&prev_lip->li_ail != &ailp->ail_head) {
-		if (XFS_LSN_CMP(prev_lip->li_lsn, lip->li_lsn) > 0) {
-			spin_unlock(&ailp->ail_lock);
-			ASSERT(0);
-			spin_lock(&ailp->ail_lock);
-		}
-	}
-
-	next_lip = list_entry(lip->li_ail.next, xfs_log_item_t, li_ail);
-	if (&next_lip->li_ail != &ailp->ail_head) {
-		if (XFS_LSN_CMP(next_lip->li_lsn, lip->li_lsn) < 0) {
-			spin_unlock(&ailp->ail_lock);
-			ASSERT(0);
-			spin_lock(&ailp->ail_lock);
-		}
-	}
+	if (in_ail &&
+	    (prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0) &&
+	    (next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0))
+		return;
 
+	spin_unlock(&ailp->ail_lock);
+	ASSERT(in_ail);
+	ASSERT(prev_lsn == NULLCOMMITLSN || XFS_LSN_CMP(prev_lsn, lsn) <= 0);
+	ASSERT(next_lsn == NULLCOMMITLSN || XFS_LSN_CMP(next_lsn, lsn) >= 0);
+	spin_lock(&ailp->ail_lock);
 }
 #else /* !DEBUG */
 #define	xfs_ail_check(a,l)
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux