Re: [PATCH 40/63] xfs: store in-progress CoW allocations in the refcount btree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Sep 29, 2016 at 08:09:59PM -0700, Darrick J. Wong wrote:
> Due to the way the CoW algorithm in XFS works, there's an interval
> during which blocks allocated to handle a CoW can be lost -- if the FS
> goes down after the blocks are allocated but before the block
> remapping takes place.  This is exacerbated by the cowextsz hint --
> allocated reservations can sit around for a while, waiting to get
> used.
> 
> Since the refcount btree doesn't normally store records with refcount
> of 1, we can use it to record these in-progress extents.  In-progress
> blocks cannot be shared because they're not user-visible, so there
> shouldn't be any conflicts with other programs.  This is a better
> solution than holding EFIs during writeback because (a) EFIs can't be
> relogged currently, (b) even if they could, EFIs are bound by
> available log space, which puts an unnecessary upper bound on how much
> CoW we can have in flight, and (c) we already have a mechanism to
> track blocks.
> 
> At mount time, read the refcount records and free anything we find
> with a refcount of 1 because those were in-progress when the FS went
> down.
> 
> Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> ---
> v2: Use the deferred operations system to avoid deadlocks and blowing
> out the transaction reservation.  This allows us to unmap a CoW
> extent from the refcountbt and into a file atomically.
> ---
>  fs/xfs/libxfs/xfs_bmap.c     |   11 +
>  fs/xfs/libxfs/xfs_format.h   |    3 
>  fs/xfs/libxfs/xfs_refcount.c |  336 +++++++++++++++++++++++++++++++++++++++++-
>  fs/xfs/libxfs/xfs_refcount.h |   10 +
>  fs/xfs/xfs_mount.c           |   12 ++
>  fs/xfs/xfs_refcount_item.c   |   12 ++
>  fs/xfs/xfs_reflink.c         |  150 +++++++++++++++++++
>  fs/xfs/xfs_reflink.h         |    1 
>  fs/xfs/xfs_super.c           |    9 +
>  fs/xfs/xfs_trace.h           |    4 +
>  10 files changed, 537 insertions(+), 11 deletions(-)
> 
> 
...
> diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
> index c95cdc3..673ecc1 100644
> --- a/fs/xfs/xfs_reflink.c
> +++ b/fs/xfs/xfs_reflink.c
...
> @@ -772,3 +787,138 @@ xfs_reflink_end_cow(
>  	trace_xfs_reflink_end_cow_error(ip, error, _RET_IP_);
>  	return error;
>  }
...
> +STATIC int
> +xfs_reflink_recover_cow_ag(
> +	struct xfs_mount		*mp,
> +	xfs_agnumber_t			agno)
> +{
> +	struct xfs_trans		*tp;
> +	struct xfs_btree_cur		*cur;
> +	struct xfs_buf			*agbp;
> +	struct xfs_reflink_recovery	*rr, *n;
> +	struct list_head		debris;
> +	union xfs_btree_irec		low;
> +	union xfs_btree_irec		high;
> +	struct xfs_defer_ops		dfops;
> +	xfs_fsblock_t			fsb;
> +	int				error;
> +
> +	error = xfs_alloc_read_agf(mp, NULL, agno, 0, &agbp);
> +	if (error)
> +		return error;
> +	cur = xfs_refcountbt_init_cursor(mp, NULL, agbp, agno, NULL);
> +
> +	/* Find all the leftover CoW staging extents. */
> +	INIT_LIST_HEAD(&debris);
> +	memset(&low, 0, sizeof(low));
> +	memset(&high, 0, sizeof(high));
> +	low.rc.rc_startblock = 0;
> +	high.rc.rc_startblock = -1U;
> +	error = xfs_btree_query_range(cur, &low, &high,
> +			xfs_reflink_recover_extent, &debris);
> +	if (error)
> +		goto out_error;

Potential memory leak of debris list entries on error.

> +	xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
> +	xfs_buf_relse(agbp);
> +
> +	/* Now iterate the list to free the leftovers */
> +	list_for_each_entry(rr, &debris, rr_list) {
> +		/* Set up transaction. */
> +		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
> +		if (error)
> +			goto out_free;
> +
> +		trace_xfs_reflink_recover_extent(mp, agno, &rr->rr_rrec);
> +
> +		/* Free the orphan record */
> +		xfs_defer_init(&dfops, &fsb);
> +		fsb = XFS_AGB_TO_FSB(mp, agno, rr->rr_rrec.rc_startblock);
> +		error = xfs_refcount_free_cow_extent(mp, &dfops, fsb,
> +				rr->rr_rrec.rc_blockcount);
> +		if (error)
> +			goto out_defer;
> +
> +		/* Free the block. */
> +		xfs_bmap_add_free(mp, &dfops, fsb,
> +				rr->rr_rrec.rc_blockcount, NULL);
> +
> +		error = xfs_defer_finish(&tp, &dfops, NULL);
> +		if (error)
> +			goto out_defer;
> +
> +		error = xfs_trans_commit(tp);
> +		if (error)
> +			goto out_cancel;
> +	}
> +	goto out_free;

return 0 ?

Brian

> +
> +out_defer:
> +	xfs_defer_cancel(&dfops);
> +out_cancel:
> +	xfs_trans_cancel(tp);
> +
> +out_free:
> +	/* Free the leftover list */
> +	list_for_each_entry_safe(rr, n, &debris, rr_list) {
> +		list_del(&rr->rr_list);
> +		kmem_free(rr);
> +	}
> +
> +	return error;
> +
> +out_error:
> +	xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
> +	xfs_buf_relse(agbp);
> +	return error;
> +}
> +
> +/*
> + * Free leftover CoW reservations that didn't get cleaned out.
> + */
> +int
> +xfs_reflink_recover_cow(
> +	struct xfs_mount	*mp)
> +{
> +	xfs_agnumber_t		agno;
> +	int			error = 0;
> +
> +	if (!xfs_sb_version_hasreflink(&mp->m_sb))
> +		return 0;
> +
> +	for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) {
> +		error = xfs_reflink_recover_cow_ag(mp, agno);
> +		if (error)
> +			break;
> +	}
> +
> +	return error;
> +}
> diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
> index c0c989a..1d2f180 100644
> --- a/fs/xfs/xfs_reflink.h
> +++ b/fs/xfs/xfs_reflink.h
> @@ -42,5 +42,6 @@ extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count);
>  extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset,
>  		xfs_off_t count);
> +extern int xfs_reflink_recover_cow(struct xfs_mount *mp);
>  
>  #endif /* __XFS_REFLINK_H */
> diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
> index 26b45b3..e6aaa91 100644
> --- a/fs/xfs/xfs_super.c
> +++ b/fs/xfs/xfs_super.c
> @@ -1306,6 +1306,15 @@ xfs_fs_remount(
>  		xfs_restore_resvblks(mp);
>  		xfs_log_work_queue(mp);
>  		xfs_queue_eofblocks(mp);
> +
> +		/* Recover any CoW blocks that never got remapped. */
> +		error = xfs_reflink_recover_cow(mp);
> +		if (error) {
> +			xfs_err(mp,
> +	"Error %d recovering leftover CoW allocations.", error);
> +			xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
> +			return error;
> +		}
>  	}
>  
>  	/* rw -> ro */
> diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
> index 8e89223..ca0930b 100644
> --- a/fs/xfs/xfs_trace.h
> +++ b/fs/xfs/xfs_trace.h
> @@ -2916,14 +2916,18 @@ DEFINE_AG_ERROR_EVENT(xfs_refcount_update_error);
>  /* refcount adjustment tracepoints */
>  DEFINE_AG_EXTENT_EVENT(xfs_refcount_increase);
>  DEFINE_AG_EXTENT_EVENT(xfs_refcount_decrease);
> +DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_increase);
> +DEFINE_AG_EXTENT_EVENT(xfs_refcount_cow_decrease);
>  DEFINE_REFCOUNT_TRIPLE_EXTENT_EVENT(xfs_refcount_merge_center_extents);
>  DEFINE_REFCOUNT_EXTENT_EVENT(xfs_refcount_modify_extent);
> +DEFINE_REFCOUNT_EXTENT_EVENT(xfs_reflink_recover_extent);
>  DEFINE_REFCOUNT_EXTENT_AT_EVENT(xfs_refcount_split_extent);
>  DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_left_extent);
>  DEFINE_REFCOUNT_DOUBLE_EXTENT_EVENT(xfs_refcount_merge_right_extent);
>  DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_left_extent);
>  DEFINE_REFCOUNT_DOUBLE_EXTENT_AT_EVENT(xfs_refcount_find_right_extent);
>  DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_error);
> +DEFINE_AG_ERROR_EVENT(xfs_refcount_adjust_cow_error);
>  DEFINE_AG_ERROR_EVENT(xfs_refcount_merge_center_extents_error);
>  DEFINE_AG_ERROR_EVENT(xfs_refcount_modify_extent_error);
>  DEFINE_AG_ERROR_EVENT(xfs_refcount_split_extent_error);
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-xfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux