Re: [PATCH 1/9] xfs_repair: port the online repair newbt structure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, May 27, 2020 at 08:15:31AM -0400, Brian Foster wrote:
> On Tue, May 19, 2020 at 06:50:49PM -0700, Darrick J. Wong wrote:
> > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > 
> > Port the new btree staging context and related block reservation helper
> > code from the kernel to repair.  We'll use this in subsequent patches to
> > implement btree bulk loading.
> > 
> > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > ---
> >  include/libxfs.h         |    1 
> >  libxfs/libxfs_api_defs.h |    2 
> >  repair/Makefile          |    4 -
> >  repair/bload.c           |  303 ++++++++++++++++++++++++++++++++++++++++++++++
> >  repair/bload.h           |   77 ++++++++++++
> >  repair/xfs_repair.c      |   17 +++
> >  6 files changed, 402 insertions(+), 2 deletions(-)
> >  create mode 100644 repair/bload.c
> >  create mode 100644 repair/bload.h
> > 
> > 
> ...
> > diff --git a/repair/bload.c b/repair/bload.c
> > new file mode 100644
> > index 00000000..9bc17468
> > --- /dev/null
> > +++ b/repair/bload.c
> > @@ -0,0 +1,303 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > + */
> > +#include <libxfs.h>
> > +#include "bload.h"
> > +
> > +#define trace_xrep_newbt_claim_block(...)	((void) 0)
> > +#define trace_xrep_newbt_free_blocks(...)	((void) 0)
> > +
> > +int bload_leaf_slack = -1;
> > +int bload_node_slack = -1;
> > +
> > +/* Ported routines from fs/xfs/scrub/repair.c */
> > +
> 
> Looks mostly straightforward, but I'll have to come back to this as I
> get to the code that uses it later in the series. In the meantime, I see
> some of these helpers in scrub/repair.c while not others. Are there
> references to other routines that are intended to be copies from kernel
> code?

Hm.  I might not understand the question, but in general the code should
be fairly similar to the kernel functions.  The biggest differences are
(a) that whole libxfs error code mess, (b) the much simpler repair_ctx
structure, and (c) the fact that repair doesn't bother with EFIs to
automatically reap blocks.

So... the ten functions you see here do the same things as their kernel
counterparts, but they get to do it in the much simpler userspace
environment.

The other functions in scrub/repair.c that didn't get ported are either
for other types of repairs or exist to support the in-kernel code and
aren't needed here.

--D

> Brian
> 
> > +/*
> > + * Roll a transaction, keeping the AG headers locked and reinitializing
> > + * the btree cursors.
> > + */
> > +int
> > +xrep_roll_ag_trans(
> > +	struct repair_ctx	*sc)
> > +{
> > +	int			error;
> > +
> > +	/* Keep the AG header buffers locked so we can keep going. */
> > +	if (sc->agi_bp)
> > +		libxfs_trans_bhold(sc->tp, sc->agi_bp);
> > +	if (sc->agf_bp)
> > +		libxfs_trans_bhold(sc->tp, sc->agf_bp);
> > +	if (sc->agfl_bp)
> > +		libxfs_trans_bhold(sc->tp, sc->agfl_bp);
> > +
> > +	/*
> > +	 * Roll the transaction.  We still own the buffer and the buffer lock
> > +	 * regardless of whether or not the roll succeeds.  If the roll fails,
> > +	 * the buffers will be released during teardown on our way out of the
> > +	 * kernel.  If it succeeds, we join them to the new transaction and
> > +	 * move on.
> > +	 */
> > +	error = -libxfs_trans_roll(&sc->tp);
> > +	if (error)
> > +		return error;
> > +
> > +	/* Join AG headers to the new transaction. */
> > +	if (sc->agi_bp)
> > +		libxfs_trans_bjoin(sc->tp, sc->agi_bp);
> > +	if (sc->agf_bp)
> > +		libxfs_trans_bjoin(sc->tp, sc->agf_bp);
> > +	if (sc->agfl_bp)
> > +		libxfs_trans_bjoin(sc->tp, sc->agfl_bp);
> > +
> > +	return 0;
> > +}
> > +
> > +/* Initialize accounting resources for staging a new AG btree. */
> > +void
> > +xrep_newbt_init_ag(
> > +	struct xrep_newbt		*xnr,
> > +	struct repair_ctx		*sc,
> > +	const struct xfs_owner_info	*oinfo,
> > +	xfs_fsblock_t			alloc_hint,
> > +	enum xfs_ag_resv_type		resv)
> > +{
> > +	memset(xnr, 0, sizeof(struct xrep_newbt));
> > +	xnr->sc = sc;
> > +	xnr->oinfo = *oinfo; /* structure copy */
> > +	xnr->alloc_hint = alloc_hint;
> > +	xnr->resv = resv;
> > +	INIT_LIST_HEAD(&xnr->resv_list);
> > +}
> > +
> > +/* Initialize accounting resources for staging a new inode fork btree. */
> > +void
> > +xrep_newbt_init_inode(
> > +	struct xrep_newbt		*xnr,
> > +	struct repair_ctx		*sc,
> > +	int				whichfork,
> > +	const struct xfs_owner_info	*oinfo)
> > +{
> > +	xrep_newbt_init_ag(xnr, sc, oinfo,
> > +			XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino),
> > +			XFS_AG_RESV_NONE);
> > +	xnr->ifake.if_fork = kmem_zone_zalloc(xfs_ifork_zone, 0);
> > +	xnr->ifake.if_fork_size = XFS_IFORK_SIZE(sc->ip, whichfork);
> > +}
> > +
> > +/*
> > + * Initialize accounting resources for staging a new btree.  Callers are
> > + * expected to add their own reservations (and clean them up) manually.
> > + */
> > +void
> > +xrep_newbt_init_bare(
> > +	struct xrep_newbt		*xnr,
> > +	struct repair_ctx		*sc)
> > +{
> > +	xrep_newbt_init_ag(xnr, sc, &XFS_RMAP_OINFO_ANY_OWNER, NULLFSBLOCK,
> > +			XFS_AG_RESV_NONE);
> > +}
> > +
> > +/* Designate specific blocks to be used to build our new btree. */
> > +int
> > +xrep_newbt_add_blocks(
> > +	struct xrep_newbt	*xnr,
> > +	xfs_fsblock_t		fsbno,
> > +	xfs_extlen_t		len)
> > +{
> > +	struct xrep_newbt_resv	*resv;
> > +
> > +	resv = kmem_alloc(sizeof(struct xrep_newbt_resv), KM_MAYFAIL);
> > +	if (!resv)
> > +		return ENOMEM;
> > +
> > +	INIT_LIST_HEAD(&resv->list);
> > +	resv->fsbno = fsbno;
> > +	resv->len = len;
> > +	resv->used = 0;
> > +	list_add_tail(&resv->list, &xnr->resv_list);
> > +	return 0;
> > +}
> > +
> > +/* Reserve disk space for our new btree. */
> > +int
> > +xrep_newbt_alloc_blocks(
> > +	struct xrep_newbt	*xnr,
> > +	uint64_t		nr_blocks)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +	xfs_alloctype_t		type;
> > +	xfs_fsblock_t		alloc_hint = xnr->alloc_hint;
> > +	int			error = 0;
> > +
> > +	type = sc->ip ? XFS_ALLOCTYPE_START_BNO : XFS_ALLOCTYPE_NEAR_BNO;
> > +
> > +	while (nr_blocks > 0 && !error) {
> > +		struct xfs_alloc_arg	args = {
> > +			.tp		= sc->tp,
> > +			.mp		= sc->mp,
> > +			.type		= type,
> > +			.fsbno		= alloc_hint,
> > +			.oinfo		= xnr->oinfo,
> > +			.minlen		= 1,
> > +			.maxlen		= nr_blocks,
> > +			.prod		= 1,
> > +			.resv		= xnr->resv,
> > +		};
> > +
> > +		error = -libxfs_alloc_vextent(&args);
> > +		if (error)
> > +			return error;
> > +		if (args.fsbno == NULLFSBLOCK)
> > +			return ENOSPC;
> > +
> > +		/* We don't have real EFIs here so skip that. */
> > +
> > +		error = xrep_newbt_add_blocks(xnr, args.fsbno, args.len);
> > +		if (error)
> > +			break;
> > +
> > +		nr_blocks -= args.len;
> > +		alloc_hint = args.fsbno + args.len - 1;
> > +
> > +		if (sc->ip)
> > +			error = -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > +		else
> > +			error = xrep_roll_ag_trans(sc);
> > +	}
> > +
> > +	return error;
> > +}
> > +
> > +/*
> > + * Release blocks that were reserved for a btree repair.  If the repair
> > + * succeeded then we log deferred frees for unused blocks.  Otherwise, we try
> > + * to free the extents immediately to roll the filesystem back to where it was
> > + * before we started.
> > + */
> > +static inline int
> > +xrep_newbt_destroy_reservation(
> > +	struct xrep_newbt	*xnr,
> > +	struct xrep_newbt_resv	*resv,
> > +	bool			cancel_repair)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +
> > +	if (cancel_repair) {
> > +		int		error;
> > +
> > +		/* Free the extent then roll the transaction. */
> > +		error = -libxfs_free_extent(sc->tp, resv->fsbno, resv->len,
> > +				&xnr->oinfo, xnr->resv);
> > +		if (error)
> > +			return error;
> > +
> > +		if (sc->ip)
> > +			return -libxfs_trans_roll_inode(&sc->tp, sc->ip);
> > +		return xrep_roll_ag_trans(sc);
> > +	}
> > +
> > +	/* We don't have EFIs here so skip the EFD. */
> > +
> > +	/*
> > +	 * Use the deferred freeing mechanism to schedule for deletion any
> > +	 * blocks we didn't use to rebuild the tree.  This enables us to log
> > +	 * them all in the same transaction as the root change.
> > +	 */
> > +	resv->fsbno += resv->used;
> > +	resv->len -= resv->used;
> > +	resv->used = 0;
> > +
> > +	if (resv->len == 0)
> > +		return 0;
> > +
> > +	trace_xrep_newbt_free_blocks(sc->mp,
> > +			XFS_FSB_TO_AGNO(sc->mp, resv->fsbno),
> > +			XFS_FSB_TO_AGBNO(sc->mp, resv->fsbno),
> > +			resv->len, xnr->oinfo.oi_owner);
> > +
> > +	__xfs_bmap_add_free(sc->tp, resv->fsbno, resv->len, &xnr->oinfo, true);
> > +
> > +	return 0;
> > +}
> > +
> > +/* Free all the accounting info and disk space we reserved for a new btree. */
> > +void
> > +xrep_newbt_destroy(
> > +	struct xrep_newbt	*xnr,
> > +	int			error)
> > +{
> > +	struct repair_ctx	*sc = xnr->sc;
> > +	struct xrep_newbt_resv	*resv, *n;
> > +	int			err2;
> > +
> > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > +		err2 = xrep_newbt_destroy_reservation(xnr, resv, error != 0);
> > +		if (err2)
> > +			goto junkit;
> > +
> > +		list_del(&resv->list);
> > +		kmem_free(resv);
> > +	}
> > +
> > +junkit:
> > +	/*
> > +	 * If we still have reservations attached to @newbt, cleanup must have
> > +	 * failed and the filesystem is about to go down.  Clean up the incore
> > +	 * reservations.
> > +	 */
> > +	list_for_each_entry_safe(resv, n, &xnr->resv_list, list) {
> > +		list_del(&resv->list);
> > +		kmem_free(resv);
> > +	}
> > +
> > +	if (sc->ip) {
> > +		kmem_cache_free(xfs_ifork_zone, xnr->ifake.if_fork);
> > +		xnr->ifake.if_fork = NULL;
> > +	}
> > +}
> > +
> > +/* Feed one of the reserved btree blocks to the bulk loader. */
> > +int
> > +xrep_newbt_claim_block(
> > +	struct xfs_btree_cur	*cur,
> > +	struct xrep_newbt	*xnr,
> > +	union xfs_btree_ptr	*ptr)
> > +{
> > +	struct xrep_newbt_resv	*resv;
> > +	xfs_fsblock_t		fsb;
> > +
> > +	/*
> > +	 * The first item in the list should always have a free block unless
> > +	 * we're completely out.
> > +	 */
> > +	resv = list_first_entry(&xnr->resv_list, struct xrep_newbt_resv, list);
> > +	if (resv->used == resv->len)
> > +		return ENOSPC;
> > +
> > +	/*
> > +	 * Peel off a block from the start of the reservation.  We allocate
> > +	 * blocks in order to place blocks on disk in increasing record or key
> > +	 * order.  The block reservations tend to end up on the list in
> > +	 * decreasing order, which hopefully results in leaf blocks ending up
> > +	 * together.
> > +	 */
> > +	fsb = resv->fsbno + resv->used;
> > +	resv->used++;
> > +
> > +	/* If we used all the blocks in this reservation, move it to the end. */
> > +	if (resv->used == resv->len)
> > +		list_move_tail(&resv->list, &xnr->resv_list);
> > +
> > +	trace_xrep_newbt_claim_block(cur->bc_mp,
> > +			XFS_FSB_TO_AGNO(cur->bc_mp, fsb),
> > +			XFS_FSB_TO_AGBNO(cur->bc_mp, fsb),
> > +			1, xnr->oinfo.oi_owner);
> > +
> > +	if (cur->bc_flags & XFS_BTREE_LONG_PTRS)
> > +		ptr->l = cpu_to_be64(fsb);
> > +	else
> > +		ptr->s = cpu_to_be32(XFS_FSB_TO_AGBNO(cur->bc_mp, fsb));
> > +	return 0;
> > +}
> > diff --git a/repair/bload.h b/repair/bload.h
> > new file mode 100644
> > index 00000000..020c4834
> > --- /dev/null
> > +++ b/repair/bload.h
> > @@ -0,0 +1,77 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (C) 2020 Oracle.  All Rights Reserved.
> > + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
> > + */
> > +#ifndef __XFS_REPAIR_BLOAD_H__
> > +#define __XFS_REPAIR_BLOAD_H__
> > +
> > +extern int bload_leaf_slack;
> > +extern int bload_node_slack;
> > +
> > +struct repair_ctx {
> > +	struct xfs_mount	*mp;
> > +	struct xfs_inode	*ip;
> > +	struct xfs_trans	*tp;
> > +
> > +	struct xfs_buf		*agi_bp;
> > +	struct xfs_buf		*agf_bp;
> > +	struct xfs_buf		*agfl_bp;
> > +};
> > +
> > +struct xrep_newbt_resv {
> > +	/* Link to list of extents that we've reserved. */
> > +	struct list_head	list;
> > +
> > +	/* FSB of the block we reserved. */
> > +	xfs_fsblock_t		fsbno;
> > +
> > +	/* Length of the reservation. */
> > +	xfs_extlen_t		len;
> > +
> > +	/* How much of this reservation we've used. */
> > +	xfs_extlen_t		used;
> > +};
> > +
> > +struct xrep_newbt {
> > +	struct repair_ctx	*sc;
> > +
> > +	/* List of extents that we've reserved. */
> > +	struct list_head	resv_list;
> > +
> > +	/* Fake root for new btree. */
> > +	union {
> > +		struct xbtree_afakeroot	afake;
> > +		struct xbtree_ifakeroot	ifake;
> > +	};
> > +
> > +	/* rmap owner of these blocks */
> > +	struct xfs_owner_info	oinfo;
> > +
> > +	/* The last reservation we allocated from. */
> > +	struct xrep_newbt_resv	*last_resv;
> > +
> > +	/* Allocation hint */
> > +	xfs_fsblock_t		alloc_hint;
> > +
> > +	/* per-ag reservation type */
> > +	enum xfs_ag_resv_type	resv;
> > +};
> > +
> > +#define for_each_xrep_newbt_reservation(xnr, resv, n)	\
> > +	list_for_each_entry_safe((resv), (n), &(xnr)->resv_list, list)
> > +
> > +void xrep_newbt_init_bare(struct xrep_newbt *xnr, struct repair_ctx *sc);
> > +void xrep_newbt_init_ag(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > +		const struct xfs_owner_info *oinfo, xfs_fsblock_t alloc_hint,
> > +		enum xfs_ag_resv_type resv);
> > +void xrep_newbt_init_inode(struct xrep_newbt *xnr, struct repair_ctx *sc,
> > +		int whichfork, const struct xfs_owner_info *oinfo);
> > +int xrep_newbt_add_blocks(struct xrep_newbt *xnr, xfs_fsblock_t fsbno,
> > +		xfs_extlen_t len);
> > +int xrep_newbt_alloc_blocks(struct xrep_newbt *xnr, uint64_t nr_blocks);
> > +void xrep_newbt_destroy(struct xrep_newbt *xnr, int error);
> > +int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
> > +		union xfs_btree_ptr *ptr);
> > +
> > +#endif /* __XFS_REPAIR_BLOAD_H__ */
> > diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
> > index 9d72fa8e..8fbd3649 100644
> > --- a/repair/xfs_repair.c
> > +++ b/repair/xfs_repair.c
> > @@ -24,6 +24,7 @@
> >  #include "rmap.h"
> >  #include "libfrog/fsgeom.h"
> >  #include "libfrog/platform.h"
> > +#include "bload.h"
> >  
> >  /*
> >   * option tables for getsubopt calls
> > @@ -39,6 +40,8 @@ enum o_opt_nums {
> >  	AG_STRIDE,
> >  	FORCE_GEO,
> >  	PHASE2_THREADS,
> > +	BLOAD_LEAF_SLACK,
> > +	BLOAD_NODE_SLACK,
> >  	O_MAX_OPTS,
> >  };
> >  
> > @@ -49,6 +52,8 @@ static char *o_opts[] = {
> >  	[AG_STRIDE]		= "ag_stride",
> >  	[FORCE_GEO]		= "force_geometry",
> >  	[PHASE2_THREADS]	= "phase2_threads",
> > +	[BLOAD_LEAF_SLACK]	= "debug_bload_leaf_slack",
> > +	[BLOAD_NODE_SLACK]	= "debug_bload_node_slack",
> >  	[O_MAX_OPTS]		= NULL,
> >  };
> >  
> > @@ -260,6 +265,18 @@ process_args(int argc, char **argv)
> >  		_("-o phase2_threads requires a parameter\n"));
> >  					phase2_threads = (int)strtol(val, NULL, 0);
> >  					break;
> > +				case BLOAD_LEAF_SLACK:
> > +					if (!val)
> > +						do_abort(
> > +		_("-o debug_bload_leaf_slack requires a parameter\n"));
> > +					bload_leaf_slack = (int)strtol(val, NULL, 0);
> > +					break;
> > +				case BLOAD_NODE_SLACK:
> > +					if (!val)
> > +						do_abort(
> > +		_("-o debug_bload_node_slack requires a parameter\n"));
> > +					bload_node_slack = (int)strtol(val, NULL, 0);
> > +					break;
> >  				default:
> >  					unknown('o', val);
> >  					break;
> > 
> 



[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux