Re: [PATCH] xfs: collect errors from inodegc for unlinked inode recovery

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, May 30, 2023 at 10:19:28AM +1000, Dave Chinner wrote:
> From: Dave Chinner <dchinner@xxxxxxxxxx>
> 
> Unlinked list recovery requires errors removing the inode the from
> the unlinked list get fed back to the main recovery loop. Now that
> we offload the unlinking to the inodegc work, we don't get errors
> being fed back when we trip over a corruption that prevents the
> inode from being removed from the unlinked list.
> 
> This means we never clear the corrupt unlinked list bucket,
> resulting in runtime operations eventually tripping over it and
> shutting down.
> 
> Fix this by collecting inodegc worker errors and feed them
> back to the flush caller. This is largely best effort - the only
> context that really cares is log recovery, and it only flushes a
> single inode at a time so we don't need complex synchronised
> handling. Essentially the inodegc workers will capture the first
> error that occurs and the next flush will gather them and clear
> them. The flush itself will only report the first gathered error.
> 
> In the cases where callers can return errors, propagate the
> collected inodegc flush error up the error handling chain.
> 
> In the case of inode unlinked list recovery, there are several
> superfluous calls to flush queued unlinked inodes -
> xlog_recover_iunlink_bucket() guarantees that it has flushed the
> inodegc and collected errors before it returns. Hence nothing in the
> calling path needs to run a flush, even when an error is returned.

Hmm.  So I guess what you're saying is that xfs_inactive now returns
negative errnos, and everything that calls down to that function will
pass the error upwards through the stack?

Any of those call paths that already could handle a negative errno will
now fail on a corrupt inactive inode; and the only place that will
silently "drop" the negative errno is unmount?

If 'yes' and 'yes' and the kbuild robot warnings get fixed,
Reviewed-by: Darrick J. Wong <djwong@xxxxxxxxxx>

--D

> Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
> ---
>  fs/xfs/xfs_icache.c      | 46 ++++++++++++++++++++++++++++++++--------
>  fs/xfs/xfs_icache.h      |  4 ++--
>  fs/xfs/xfs_inode.c       | 18 +++++-----------
>  fs/xfs/xfs_inode.h       |  2 +-
>  fs/xfs/xfs_log_recover.c | 19 ++++++++---------
>  fs/xfs/xfs_mount.h       |  1 +
>  fs/xfs/xfs_super.c       |  1 +
>  fs/xfs/xfs_trans.c       |  4 +++-
>  8 files changed, 59 insertions(+), 36 deletions(-)
> 
> diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
> index 0f60e301eb1f..453890942d9f 100644
> --- a/fs/xfs/xfs_icache.c
> +++ b/fs/xfs/xfs_icache.c
> @@ -454,6 +454,27 @@ xfs_inodegc_queue_all(
>  	return ret;
>  }
>  
> +/* Wait for all queued work and collect errors */
> +static int
> +xfs_inodegc_wait_all(
> +	struct xfs_mount	*mp)
> +{
> +	int			cpu;
> +	int			error = 0;
> +
> +	flush_workqueue(mp->m_inodegc_wq);
> +	for_each_online_cpu(cpu) {
> +		struct xfs_inodegc	*gc;
> +
> +		gc = per_cpu_ptr(mp->m_inodegc, cpu);
> +		if (gc->error && !error)
> +			error = gc->error;
> +		gc->error = 0;
> +	}
> +
> +	return error;
> +}
> +
>  /*
>   * Check the validity of the inode we just found it the cache
>   */
> @@ -1491,15 +1512,14 @@ xfs_blockgc_free_space(
>  	if (error)
>  		return error;
>  
> -	xfs_inodegc_flush(mp);
> -	return 0;
> +	return xfs_inodegc_flush(mp);
>  }
>  
>  /*
>   * Reclaim all the free space that we can by scheduling the background blockgc
>   * and inodegc workers immediately and waiting for them all to clear.
>   */
> -void
> +int
>  xfs_blockgc_flush_all(
>  	struct xfs_mount	*mp)
>  {
> @@ -1520,7 +1540,7 @@ xfs_blockgc_flush_all(
>  	for_each_perag_tag(mp, agno, pag, XFS_ICI_BLOCKGC_TAG)
>  		flush_delayed_work(&pag->pag_blockgc_work);
>  
> -	xfs_inodegc_flush(mp);
> +	return xfs_inodegc_flush(mp);
>  }
>  
>  /*
> @@ -1842,13 +1862,17 @@ xfs_inodegc_set_reclaimable(
>   * This is the last chance to make changes to an otherwise unreferenced file
>   * before incore reclamation happens.
>   */
> -static void
> +static int
>  xfs_inodegc_inactivate(
>  	struct xfs_inode	*ip)
>  {
> +	int			error;
> +
>  	trace_xfs_inode_inactivating(ip);
> -	xfs_inactive(ip);
> +	error = xfs_inactive(ip);
>  	xfs_inodegc_set_reclaimable(ip);
> +	return error;
> +
>  }
>  
>  void
> @@ -1880,8 +1904,12 @@ xfs_inodegc_worker(
>  
>  	WRITE_ONCE(gc->shrinker_hits, 0);
>  	llist_for_each_entry_safe(ip, n, node, i_gclist) {
> +		int	error;
> +
>  		xfs_iflags_set(ip, XFS_INACTIVATING);
> -		xfs_inodegc_inactivate(ip);
> +		error = xfs_inodegc_inactivate(ip);
> +		if (error && !gc->error)
> +			gc->error = error;
>  	}
>  
>  	memalloc_nofs_restore(nofs_flag);
> @@ -1905,13 +1933,13 @@ xfs_inodegc_push(
>   * Force all currently queued inode inactivation work to run immediately and
>   * wait for the work to finish.
>   */
> -void
> +int
>  xfs_inodegc_flush(
>  	struct xfs_mount	*mp)
>  {
>  	xfs_inodegc_push(mp);
>  	trace_xfs_inodegc_flush(mp, __return_address);
> -	flush_workqueue(mp->m_inodegc_wq);
> +	return xfs_inodegc_wait_all(mp);
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
> index 87910191a9dd..1dcdcb23796e 100644
> --- a/fs/xfs/xfs_icache.h
> +++ b/fs/xfs/xfs_icache.h
> @@ -62,7 +62,7 @@ int xfs_blockgc_free_dquots(struct xfs_mount *mp, struct xfs_dquot *udqp,
>  		unsigned int iwalk_flags);
>  int xfs_blockgc_free_quota(struct xfs_inode *ip, unsigned int iwalk_flags);
>  int xfs_blockgc_free_space(struct xfs_mount *mp, struct xfs_icwalk *icm);
> -void xfs_blockgc_flush_all(struct xfs_mount *mp);
> +int xfs_blockgc_flush_all(struct xfs_mount *mp);
>  
>  void xfs_inode_set_eofblocks_tag(struct xfs_inode *ip);
>  void xfs_inode_clear_eofblocks_tag(struct xfs_inode *ip);
> @@ -80,7 +80,7 @@ void xfs_blockgc_start(struct xfs_mount *mp);
>  
>  void xfs_inodegc_worker(struct work_struct *work);
>  void xfs_inodegc_push(struct xfs_mount *mp);
> -void xfs_inodegc_flush(struct xfs_mount *mp);
> +int xfs_inodegc_flush(struct xfs_mount *mp);
>  void xfs_inodegc_stop(struct xfs_mount *mp);
>  void xfs_inodegc_start(struct xfs_mount *mp);
>  void xfs_inodegc_cpu_dead(struct xfs_mount *mp, unsigned int cpu);
> diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> index 5808abab786c..c0d0466f3270 100644
> --- a/fs/xfs/xfs_inode.c
> +++ b/fs/xfs/xfs_inode.c
> @@ -1620,16 +1620,7 @@ xfs_inactive_ifree(
>  	 */
>  	xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_ICOUNT, -1);
>  
> -	/*
> -	 * Just ignore errors at this point.  There is nothing we can do except
> -	 * to try to keep going. Make sure it's not a silent error.
> -	 */
> -	error = xfs_trans_commit(tp);
> -	if (error)
> -		xfs_notice(mp, "%s: xfs_trans_commit returned error %d",
> -			__func__, error);
> -
> -	return 0;
> +	return xfs_trans_commit(tp);
>  }
>  
>  /*
> @@ -1693,7 +1684,7 @@ xfs_inode_needs_inactive(
>   * now be truncated.  Also, we clear all of the read-ahead state
>   * kept for the inode here since the file is now closed.
>   */
> -void
> +int
>  xfs_inactive(
>  	xfs_inode_t	*ip)
>  {
> @@ -1736,7 +1727,7 @@ xfs_inactive(
>  		 * reference to the inode at this point anyways.
>  		 */
>  		if (xfs_can_free_eofblocks(ip, true))
> -			xfs_free_eofblocks(ip);
> +			error = xfs_free_eofblocks(ip);
>  
>  		goto out;
>  	}
> @@ -1773,7 +1764,7 @@ xfs_inactive(
>  	/*
>  	 * Free the inode.
>  	 */
> -	xfs_inactive_ifree(ip);
> +	error = xfs_inactive_ifree(ip);
>  
>  out:
>  	/*
> @@ -1781,6 +1772,7 @@ xfs_inactive(
>  	 * the attached dquots.
>  	 */
>  	xfs_qm_dqdetach(ip);
> +	return error;
>  }
>  
>  /*
> diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
> index 69d21e42c10a..7547caf2f2ab 100644
> --- a/fs/xfs/xfs_inode.h
> +++ b/fs/xfs/xfs_inode.h
> @@ -470,7 +470,7 @@ enum layout_break_reason {
>  	(xfs_has_grpid((pip)->i_mount) || (VFS_I(pip)->i_mode & S_ISGID))
>  
>  int		xfs_release(struct xfs_inode *ip);
> -void		xfs_inactive(struct xfs_inode *ip);
> +int		xfs_inactive(struct xfs_inode *ip);
>  int		xfs_lookup(struct xfs_inode *dp, const struct xfs_name *name,
>  			   struct xfs_inode **ipp, struct xfs_name *ci_name);
>  int		xfs_create(struct mnt_idmap *idmap,
> diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
> index 322eb2ee6c55..82c81d20459d 100644
> --- a/fs/xfs/xfs_log_recover.c
> +++ b/fs/xfs/xfs_log_recover.c
> @@ -2711,7 +2711,9 @@ xlog_recover_iunlink_bucket(
>  			 * just to flush the inodegc queue and wait for it to
>  			 * complete.
>  			 */
> -			xfs_inodegc_flush(mp);
> +			error = xfs_inodegc_flush(mp);
> +			if (error)
> +				break;
>  		}
>  
>  		prev_agino = agino;
> @@ -2719,10 +2721,15 @@ xlog_recover_iunlink_bucket(
>  	}
>  
>  	if (prev_ip) {
> +		int	error2;
> +
>  		ip->i_prev_unlinked = prev_agino;
>  		xfs_irele(prev_ip);
> +
> +		error2 = xfs_inodegc_flush(mp);
> +		if (error2 && !error)
> +			return error2;
>  	}
> -	xfs_inodegc_flush(mp);
>  	return error;
>  }
>  
> @@ -2789,7 +2796,6 @@ xlog_recover_iunlink_ag(
>  			 * bucket and remaining inodes on it unreferenced and
>  			 * unfreeable.
>  			 */
> -			xfs_inodegc_flush(pag->pag_mount);
>  			xlog_recover_clear_agi_bucket(pag, bucket);
>  		}
>  	}
> @@ -2806,13 +2812,6 @@ xlog_recover_process_iunlinks(
>  
>  	for_each_perag(log->l_mp, agno, pag)
>  		xlog_recover_iunlink_ag(pag);
> -
> -	/*
> -	 * Flush the pending unlinked inodes to ensure that the inactivations
> -	 * are fully completed on disk and the incore inodes can be reclaimed
> -	 * before we signal that recovery is complete.
> -	 */
> -	xfs_inodegc_flush(log->l_mp);
>  }
>  
>  STATIC void
> diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
> index aaaf5ec13492..6c09f89534d3 100644
> --- a/fs/xfs/xfs_mount.h
> +++ b/fs/xfs/xfs_mount.h
> @@ -62,6 +62,7 @@ struct xfs_error_cfg {
>  struct xfs_inodegc {
>  	struct llist_head	list;
>  	struct delayed_work	work;
> +	int			error;
>  
>  	/* approximate count of inodes in the list */
>  	unsigned int		items;
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 7e706255f165..4120bd1cba90 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -1100,6 +1100,7 @@ xfs_inodegc_init_percpu(
>  #endif
>  		init_llist_head(&gc->list);
>  		gc->items = 0;
> +		gc->error = 0;
>  		INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
>  	}
>  	return 0;
> diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
> index f81fbf081b01..8c0bfc9a33b1 100644
> --- a/fs/xfs/xfs_trans.c
> +++ b/fs/xfs/xfs_trans.c
> @@ -290,7 +290,9 @@ xfs_trans_alloc(
>  		 * Do not perform a synchronous scan because callers can hold
>  		 * other locks.
>  		 */
> -		xfs_blockgc_flush_all(mp);
> +		error = xfs_blockgc_flush_all(mp);
> +		if (error)
> +			return error;
>  		want_retry = false;
>  		goto retry;
>  	}
> -- 
> 2.40.1
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux