Re: [PATCH 18/28] xfs: refactor unlinked inode recovery

Chandan Babu R <chandanrlinux@xxxxxxxxx> · Tue, 05 May 2020 18:35:30 +0530

On Tuesday 5 May 2020 6:42:29 AM IST Darrick J. Wong wrote:
> From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> 
> Move the code that processes unlinked inodes into a separate file in
> preparation for centralizing the log recovery bits that have to walk
> every AG.  No functional changes.
>

The functionality is indeed the same as was before applying this patch.

Reviewed-by: Chandan Babu R <chandanrlinux@xxxxxxxxx>

> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
>  fs/xfs/Makefile                 |    3 -
>  fs/xfs/libxfs/xfs_log_recover.h |    1 
>  fs/xfs/xfs_log_recover.c        |  177 -----------------------------------
>  fs/xfs/xfs_unlink_recover.c     |  198 +++++++++++++++++++++++++++++++++++++++
>  4 files changed, 202 insertions(+), 177 deletions(-)
>  create mode 100644 fs/xfs/xfs_unlink_recover.c
> 
> 
> diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
> index 04611a1068b4..505c898d6cee 100644
> --- a/fs/xfs/Makefile
> +++ b/fs/xfs/Makefile
> @@ -109,7 +109,8 @@ xfs-y				+= xfs_log.o \
>  				   xfs_rmap_item.o \
>  				   xfs_log_recover.o \
>  				   xfs_trans_ail.o \
> -				   xfs_trans_buf.o
> +				   xfs_trans_buf.o \
> +				   xfs_unlink_recover.o
>  
>  # optional features
>  xfs-$(CONFIG_XFS_QUOTA)		+= xfs_dquot.o \
> diff --git a/fs/xfs/libxfs/xfs_log_recover.h b/fs/xfs/libxfs/xfs_log_recover.h
> index a45f6e9fa47b..33c14dd22b77 100644
> --- a/fs/xfs/libxfs/xfs_log_recover.h
> +++ b/fs/xfs/libxfs/xfs_log_recover.h
> @@ -124,5 +124,6 @@ bool xlog_add_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  bool xlog_is_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  bool xlog_put_buffer_cancelled(struct xlog *log, xfs_daddr_t blkno, uint len);
>  void xlog_recover_iodone(struct xfs_buf *bp);
> +void xlog_recover_process_unlinked(struct xlog *log);
>  
>  #endif	/* __XFS_LOG_RECOVER_H__ */
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 20ee32c2652d..362296b34490 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2720,181 +2720,6 @@ xlog_recover_cancel_intents(
>  	spin_unlock(&ailp->ail_lock);
>  }
>  
> -/*
> - * This routine performs a transaction to null out a bad inode pointer
> - * in an agi unlinked inode hash bucket.
> - */
> -STATIC void
> -xlog_recover_clear_agi_bucket(
> -	xfs_mount_t	*mp,
> -	xfs_agnumber_t	agno,
> -	int		bucket)
> -{
> -	xfs_trans_t	*tp;
> -	xfs_agi_t	*agi;
> -	xfs_buf_t	*agibp;
> -	int		offset;
> -	int		error;
> -
> -	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
> -	if (error)
> -		goto out_error;
> -
> -	error = xfs_read_agi(mp, tp, agno, &agibp);
> -	if (error)
> -		goto out_abort;
> -
> -	agi = agibp->b_addr;
> -	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
> -	offset = offsetof(xfs_agi_t, agi_unlinked) +
> -		 (sizeof(xfs_agino_t) * bucket);
> -	xfs_trans_log_buf(tp, agibp, offset,
> -			  (offset + sizeof(xfs_agino_t) - 1));
> -
> -	error = xfs_trans_commit(tp);
> -	if (error)
> -		goto out_error;
> -	return;
> -
> -out_abort:
> -	xfs_trans_cancel(tp);
> -out_error:
> -	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
> -	return;
> -}
> -
> -STATIC xfs_agino_t
> -xlog_recover_process_one_iunlink(
> -	struct xfs_mount		*mp,
> -	xfs_agnumber_t			agno,
> -	xfs_agino_t			agino,
> -	int				bucket)
> -{
> -	struct xfs_buf			*ibp;
> -	struct xfs_dinode		*dip;
> -	struct xfs_inode		*ip;
> -	xfs_ino_t			ino;
> -	int				error;
> -
> -	ino = XFS_AGINO_TO_INO(mp, agno, agino);
> -	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
> -	if (error)
> -		goto fail;
> -
> -	/*
> -	 * Get the on disk inode to find the next inode in the bucket.
> -	 */
> -	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
> -	if (error)
> -		goto fail_iput;
> -
> -	xfs_iflags_clear(ip, XFS_IRECOVERY);
> -	ASSERT(VFS_I(ip)->i_nlink == 0);
> -	ASSERT(VFS_I(ip)->i_mode != 0);
> -
> -	/* setup for the next pass */
> -	agino = be32_to_cpu(dip->di_next_unlinked);
> -	xfs_buf_relse(ibp);
> -
> -	/*
> -	 * Prevent any DMAPI event from being sent when the reference on
> -	 * the inode is dropped.
> -	 */
> -	ip->i_d.di_dmevmask = 0;
> -
> -	xfs_irele(ip);
> -	return agino;
> -
> - fail_iput:
> -	xfs_irele(ip);
> - fail:
> -	/*
> -	 * We can't read in the inode this bucket points to, or this inode
> -	 * is messed up.  Just ditch this bucket of inodes.  We will lose
> -	 * some inodes and space, but at least we won't hang.
> -	 *
> -	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
> -	 * clear the inode pointer in the bucket.
> -	 */
> -	xlog_recover_clear_agi_bucket(mp, agno, bucket);
> -	return NULLAGINO;
> -}
> -
> -/*
> - * Recover AGI unlinked lists
> - *
> - * This is called during recovery to process any inodes which we unlinked but
> - * not freed when the system crashed.  These inodes will be on the lists in the
> - * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
> - * any inodes found on the lists. Each inode is removed from the lists when it
> - * has been fully truncated and is freed. The freeing of the inode and its
> - * removal from the list must be atomic.
> - *
> - * If everything we touch in the agi processing loop is already in memory, this
> - * loop can hold the cpu for a long time. It runs without lock contention,
> - * memory allocation contention, the need wait for IO, etc, and so will run
> - * until we either run out of inodes to process, run low on memory or we run out
> - * of log space.
> - *
> - * This behaviour is bad for latency on single CPU and non-preemptible kernels,
> - * and can prevent other filesytem work (such as CIL pushes) from running. This
> - * can lead to deadlocks if the recovery process runs out of log reservation
> - * space. Hence we need to yield the CPU when there is other kernel work
> - * scheduled on this CPU to ensure other scheduled work can run without undue
> - * latency.
> - */
> -STATIC void
> -xlog_recover_process_iunlinks(
> -	struct xlog	*log)
> -{
> -	xfs_mount_t	*mp;
> -	xfs_agnumber_t	agno;
> -	xfs_agi_t	*agi;
> -	xfs_buf_t	*agibp;
> -	xfs_agino_t	agino;
> -	int		bucket;
> -	int		error;
> -
> -	mp = log->l_mp;
> -
> -	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> -		/*
> -		 * Find the agi for this ag.
> -		 */
> -		error = xfs_read_agi(mp, NULL, agno, &agibp);
> -		if (error) {
> -			/*
> -			 * AGI is b0rked. Don't process it.
> -			 *
> -			 * We should probably mark the filesystem as corrupt
> -			 * after we've recovered all the ag's we can....
> -			 */
> -			continue;
> -		}
> -		/*
> -		 * Unlock the buffer so that it can be acquired in the normal
> -		 * course of the transaction to truncate and free each inode.
> -		 * Because we are not racing with anyone else here for the AGI
> -		 * buffer, we don't even need to hold it locked to read the
> -		 * initial unlinked bucket entries out of the buffer. We keep
> -		 * buffer reference though, so that it stays pinned in memory
> -		 * while we need the buffer.
> -		 */
> -		agi = agibp->b_addr;
> -		xfs_buf_unlock(agibp);
> -
> -		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> -			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> -			while (agino != NULLAGINO) {
> -				agino = xlog_recover_process_one_iunlink(mp,
> -							agno, agino, bucket);
> -				cond_resched();
> -			}
> -		}
> -		xfs_buf_rele(agibp);
> -	}
> -}
> -
>  STATIC void
>  xlog_unpack_data(
>  	struct xlog_rec_header	*rhead,
> @@ -3574,7 +3399,7 @@ xlog_recover_finish(
>  		 */
>  		xfs_log_force(log->l_mp, XFS_LOG_SYNC);
>  
> -		xlog_recover_process_iunlinks(log);
> +		xlog_recover_process_unlinked(log);
>  
>  		xlog_recover_check_summary(log);
>  
> diff --git a/fs/xfs/xfs_unlink_recover.c b/fs/xfs/xfs_unlink_recover.c
> new file mode 100644
> index 000000000000..2a19d096e88d
> --- /dev/null
> +++ b/fs/xfs/xfs_unlink_recover.c
> @@ -0,0 +1,198 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2000-2006 Silicon Graphics, Inc.
> + * All Rights Reserved.
> + */
> +#include "xfs.h"
> +#include "xfs_fs.h"
> +#include "xfs_shared.h"
> +#include "xfs_format.h"
> +#include "xfs_log_format.h"
> +#include "xfs_trans_resv.h"
> +#include "xfs_bit.h"
> +#include "xfs_sb.h"
> +#include "xfs_mount.h"
> +#include "xfs_defer.h"
> +#include "xfs_inode.h"
> +#include "xfs_trans.h"
> +#include "xfs_log.h"
> +#include "xfs_log_priv.h"
> +#include "xfs_log_recover.h"
> +#include "xfs_trans_priv.h"
> +#include "xfs_ialloc.h"
> +#include "xfs_icache.h"
> +
> +/*
> + * This routine performs a transaction to null out a bad inode pointer
> + * in an agi unlinked inode hash bucket.
> + */
> +STATIC void
> +xlog_recover_clear_agi_bucket(
> +	struct xfs_mount	*mp,
> +	xfs_agnumber_t		agno,
> +	int			bucket)
> +{
> +	struct xfs_trans	*tp;
> +	struct xfs_agi		*agi;
> +	struct xfs_buf		*agibp;
> +	int			offset;
> +	int			error;
> +
> +	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_clearagi, 0, 0, 0, &tp);
> +	if (error)
> +		goto out_error;
> +
> +	error = xfs_read_agi(mp, tp, agno, &agibp);
> +	if (error)
> +		goto out_abort;
> +
> +	agi = agibp->b_addr;
> +	agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
> +	offset = offsetof(xfs_agi_t, agi_unlinked) +
> +		 (sizeof(xfs_agino_t) * bucket);
> +	xfs_trans_log_buf(tp, agibp, offset,
> +			  (offset + sizeof(xfs_agino_t) - 1));
> +
> +	error = xfs_trans_commit(tp);
> +	if (error)
> +		goto out_error;
> +	return;
> +
> +out_abort:
> +	xfs_trans_cancel(tp);
> +out_error:
> +	xfs_warn(mp, "%s: failed to clear agi %d. Continuing.", __func__, agno);
> +	return;
> +}
> +
> +STATIC xfs_agino_t
> +xlog_recover_process_one_iunlink(
> +	struct xfs_mount		*mp,
> +	xfs_agnumber_t			agno,
> +	xfs_agino_t			agino,
> +	int				bucket)
> +{
> +	struct xfs_buf			*ibp;
> +	struct xfs_dinode		*dip;
> +	struct xfs_inode		*ip;
> +	xfs_ino_t			ino;
> +	int				error;
> +
> +	ino = XFS_AGINO_TO_INO(mp, agno, agino);
> +	error = xfs_iget(mp, NULL, ino, 0, 0, &ip);
> +	if (error)
> +		goto fail;
> +
> +	/*
> +	 * Get the on disk inode to find the next inode in the bucket.
> +	 */
> +	error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &ibp, 0, 0);
> +	if (error)
> +		goto fail_iput;
> +
> +	xfs_iflags_clear(ip, XFS_IRECOVERY);
> +	ASSERT(VFS_I(ip)->i_nlink == 0);
> +	ASSERT(VFS_I(ip)->i_mode != 0);
> +
> +	/* setup for the next pass */
> +	agino = be32_to_cpu(dip->di_next_unlinked);
> +	xfs_buf_relse(ibp);
> +
> +	/*
> +	 * Prevent any DMAPI event from being sent when the reference on
> +	 * the inode is dropped.
> +	 */
> +	ip->i_d.di_dmevmask = 0;
> +
> +	xfs_irele(ip);
> +	return agino;
> +
> + fail_iput:
> +	xfs_irele(ip);
> + fail:
> +	/*
> +	 * We can't read in the inode this bucket points to, or this inode
> +	 * is messed up.  Just ditch this bucket of inodes.  We will lose
> +	 * some inodes and space, but at least we won't hang.
> +	 *
> +	 * Call xlog_recover_clear_agi_bucket() to perform a transaction to
> +	 * clear the inode pointer in the bucket.
> +	 */
> +	xlog_recover_clear_agi_bucket(mp, agno, bucket);
> +	return NULLAGINO;
> +}
> +
> +/*
> + * Recover AGI unlinked lists
> + *
> + * This is called during recovery to process any inodes which we unlinked but
> + * not freed when the system crashed.  These inodes will be on the lists in the
> + * AGI blocks. What we do here is scan all the AGIs and fully truncate and free
> + * any inodes found on the lists. Each inode is removed from the lists when it
> + * has been fully truncated and is freed. The freeing of the inode and its
> + * removal from the list must be atomic.
> + *
> + * If everything we touch in the agi processing loop is already in memory, this
> + * loop can hold the cpu for a long time. It runs without lock contention,
> + * memory allocation contention, the need wait for IO, etc, and so will run
> + * until we either run out of inodes to process, run low on memory or we run out
> + * of log space.
> + *
> + * This behaviour is bad for latency on single CPU and non-preemptible kernels,
> + * and can prevent other filesytem work (such as CIL pushes) from running. This
> + * can lead to deadlocks if the recovery process runs out of log reservation
> + * space. Hence we need to yield the CPU when there is other kernel work
> + * scheduled on this CPU to ensure other scheduled work can run without undue
> + * latency.
> + */
> +void
> +xlog_recover_process_unlinked(
> +	struct xlog		*log)
> +{
> +	struct xfs_mount	*mp;
> +	struct xfs_agi		*agi;
> +	struct xfs_buf		*agibp;
> +	xfs_agnumber_t		agno;
> +	xfs_agino_t		agino;
> +	int			bucket;
> +	int			error;
> +
> +	mp = log->l_mp;
> +
> +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> +		/*
> +		 * Find the agi for this ag.
> +		 */
> +		error = xfs_read_agi(mp, NULL, agno, &agibp);
> +		if (error) {
> +			/*
> +			 * AGI is b0rked. Don't process it.
> +			 *
> +			 * We should probably mark the filesystem as corrupt
> +			 * after we've recovered all the ag's we can....
> +			 */
> +			continue;
> +		}
> +		/*
> +		 * Unlock the buffer so that it can be acquired in the normal
> +		 * course of the transaction to truncate and free each inode.
> +		 * Because we are not racing with anyone else here for the AGI
> +		 * buffer, we don't even need to hold it locked to read the
> +		 * initial unlinked bucket entries out of the buffer. We keep
> +		 * buffer reference though, so that it stays pinned in memory
> +		 * while we need the buffer.
> +		 */
> +		agi = agibp->b_addr;
> +		xfs_buf_unlock(agibp);
> +
> +		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) {
> +			agino = be32_to_cpu(agi->agi_unlinked[bucket]);
> +			while (agino != NULLAGINO) {
> +				agino = xlog_recover_process_one_iunlink(mp,
> +							agno, agino, bucket);
> +				cond_resched();
> +			}
> +		}
> +		xfs_buf_rele(agibp);
> +	}
> +}
> 
> 

-- 
chandan