Re: [PATCH 02/14] xfs: don't allocate scrub contexts on the stack

"Darrick J. Wong" <djwong@xxxxxxxxxx> · Mon, 20 Sep 2021 10:39:33 -0700

On Mon, Sep 20, 2021 at 03:23:34PM +0530, Chandan Babu R wrote:
> On 18 Sep 2021 at 06:59, Darrick J. Wong wrote:
> > From: Darrick J. Wong <djwong@xxxxxxxxxx>
> >
> > Convert the on-stack scrub context, btree scrub context, and da btree
> > scrub context into a heap allocation so that we reduce stack usage and
> > gain the ability to handle tall btrees without issue.
> >
> > Specifically, this saves us ~208 bytes for the dabtree scrub, ~464 bytes
> > for the btree scrub, and ~200 bytes for the main scrub context.
> >
> 
> Apart from the nits pointed below, the remaining changes look good to me.
> 
> Reviewed-by: Chandan Babu R <chandan.babu@xxxxxxxxxx>
> 
> 
> > Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
> > ---
> >  fs/xfs/scrub/btree.c   |   54 ++++++++++++++++++++++++------------------
> >  fs/xfs/scrub/btree.h   |    1 +
> >  fs/xfs/scrub/dabtree.c |   62 ++++++++++++++++++++++++++----------------------
> >  fs/xfs/scrub/scrub.c   |   60 ++++++++++++++++++++++++++--------------------
> >  4 files changed, 98 insertions(+), 79 deletions(-)
> >
> >
> > diff --git a/fs/xfs/scrub/btree.c b/fs/xfs/scrub/btree.c
> > index eccb855dc904..26dcb4691e31 100644
> > --- a/fs/xfs/scrub/btree.c
> > +++ b/fs/xfs/scrub/btree.c
> > @@ -627,15 +627,8 @@ xchk_btree(
> >  	const struct xfs_owner_info	*oinfo,
> >  	void				*private)
> >  {
> > -	struct xchk_btree		bs = {
> > -		.cur			= cur,
> > -		.scrub_rec		= scrub_fn,
> > -		.oinfo			= oinfo,
> > -		.firstrec		= true,
> > -		.private		= private,
> > -		.sc			= sc,
> > -	};
> >  	union xfs_btree_ptr		ptr;
> > +	struct xchk_btree		*bs;
> >  	union xfs_btree_ptr		*pp;
> >  	union xfs_btree_rec		*recp;
> >  	struct xfs_btree_block		*block;
> > @@ -646,10 +639,24 @@ xchk_btree(
> >  	int				i;
> >  	int				error = 0;
> >  
> > +	/*
> > +	 * Allocate the btree scrub context from the heap, because this
> > +	 * structure can get rather large.
> > +	 */
> > +	bs = kmem_zalloc(sizeof(struct xchk_btree), KM_NOFS | KM_MAYFAIL);
> > +	if (!bs)
> > +		return -ENOMEM;
> > +	bs->cur = cur;
> > +	bs->scrub_rec = scrub_fn;
> > +	bs->oinfo = oinfo;
> > +	bs->firstrec = true;
> > +	bs->private = private;
> > +	bs->sc = sc;
> > +
> >  	/* Initialize scrub state */
> >  	for (i = 0; i < XFS_BTREE_MAXLEVELS; i++)
> > -		bs.firstkey[i] = true;
> > -	INIT_LIST_HEAD(&bs.to_check);
> > +		bs->firstkey[i] = true;
> > +	INIT_LIST_HEAD(&bs->to_check);
> >  
> >  	/* Don't try to check a tree with a height we can't handle. */
> >  	if (cur->bc_nlevels > XFS_BTREE_MAXLEVELS) {
> > @@ -663,9 +670,9 @@ xchk_btree(
> >  	 */
> >  	level = cur->bc_nlevels - 1;
> >  	cur->bc_ops->init_ptr_from_cur(cur, &ptr);
> > -	if (!xchk_btree_ptr_ok(&bs, cur->bc_nlevels, &ptr))
> > +	if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr))
> >  		goto out;
> > -	error = xchk_btree_get_block(&bs, level, &ptr, &block, &bp);
> > +	error = xchk_btree_get_block(bs, level, &ptr, &block, &bp);
> >  	if (error || !block)
> >  		goto out;
> >  
> > @@ -678,7 +685,7 @@ xchk_btree(
> >  			/* End of leaf, pop back towards the root. */
> >  			if (cur->bc_ptrs[level] >
> >  			    be16_to_cpu(block->bb_numrecs)) {
> > -				xchk_btree_block_keys(&bs, level, block);
> > +				xchk_btree_block_keys(bs, level, block);
> >  				if (level < cur->bc_nlevels - 1)
> >  					cur->bc_ptrs[level + 1]++;
> >  				level++;
> > @@ -686,11 +693,11 @@ xchk_btree(
> >  			}
> >  
> >  			/* Records in order for scrub? */
> > -			xchk_btree_rec(&bs);
> > +			xchk_btree_rec(bs);
> >  
> >  			/* Call out to the record checker. */
> >  			recp = xfs_btree_rec_addr(cur, cur->bc_ptrs[0], block);
> > -			error = bs.scrub_rec(&bs, recp);
> > +			error = bs->scrub_rec(bs, recp);
> >  			if (error)
> >  				break;
> >  			if (xchk_should_terminate(sc, &error) ||
> > @@ -703,7 +710,7 @@ xchk_btree(
> >  
> >  		/* End of node, pop back towards the root. */
> >  		if (cur->bc_ptrs[level] > be16_to_cpu(block->bb_numrecs)) {
> > -			xchk_btree_block_keys(&bs, level, block);
> > +			xchk_btree_block_keys(bs, level, block);
> >  			if (level < cur->bc_nlevels - 1)
> >  				cur->bc_ptrs[level + 1]++;
> >  			level++;
> > @@ -711,16 +718,16 @@ xchk_btree(
> >  		}
> >  
> >  		/* Keys in order for scrub? */
> > -		xchk_btree_key(&bs, level);
> > +		xchk_btree_key(bs, level);
> >  
> >  		/* Drill another level deeper. */
> >  		pp = xfs_btree_ptr_addr(cur, cur->bc_ptrs[level], block);
> > -		if (!xchk_btree_ptr_ok(&bs, level, pp)) {
> > +		if (!xchk_btree_ptr_ok(bs, level, pp)) {
> >  			cur->bc_ptrs[level]++;
> >  			continue;
> >  		}
> >  		level--;
> > -		error = xchk_btree_get_block(&bs, level, pp, &block, &bp);
> > +		error = xchk_btree_get_block(bs, level, pp, &block, &bp);
> >  		if (error || !block)
> >  			goto out;
> >  
> > @@ -729,13 +736,14 @@ xchk_btree(
> >  
> >  out:
> >  	/* Process deferred owner checks on btree blocks. */
> > -	list_for_each_entry_safe(co, n, &bs.to_check, list) {
> > -		if (!error && bs.cur)
> > -			error = xchk_btree_check_block_owner(&bs,
> > -					co->level, co->daddr);
> > +	list_for_each_entry_safe(co, n, &bs->to_check, list) {
> > +		if (!error && bs->cur)
> > +			error = xchk_btree_check_block_owner(bs, co->level,
> > +					co->daddr);
> >  		list_del(&co->list);
> >  		kmem_free(co);
> >  	}
> > +	kmem_free(bs);
> >  
> >  	return error;
> >  }
> > diff --git a/fs/xfs/scrub/btree.h b/fs/xfs/scrub/btree.h
> > index b7d2fc01fbf9..d5c0b0cbc505 100644
> > --- a/fs/xfs/scrub/btree.h
> > +++ b/fs/xfs/scrub/btree.h
> > @@ -44,6 +44,7 @@ struct xchk_btree {
> >  	bool				firstkey[XFS_BTREE_MAXLEVELS];
> >  	struct list_head		to_check;
> >  };
> > +
> >  int xchk_btree(struct xfs_scrub *sc, struct xfs_btree_cur *cur,
> >  		xchk_btree_rec_fn scrub_fn, const struct xfs_owner_info *oinfo,
> >  		void *private);
> > diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
> > index 8a52514bc1ff..b962cfbbd92b 100644
> > --- a/fs/xfs/scrub/dabtree.c
> > +++ b/fs/xfs/scrub/dabtree.c
> > @@ -473,7 +473,7 @@ xchk_da_btree(
> >  	xchk_da_btree_rec_fn		scrub_fn,
> >  	void				*private)
> >  {
> > -	struct xchk_da_btree		ds = {};
> > +	struct xchk_da_btree		*ds;
> >  	struct xfs_mount		*mp = sc->mp;
> >  	struct xfs_da_state_blk		*blks;
> >  	struct xfs_da_node_entry	*key;
> > @@ -486,32 +486,35 @@ xchk_da_btree(
> >  		return 0;
> >  
> >  	/* Set up initial da state. */
> > -	ds.dargs.dp = sc->ip;
> > -	ds.dargs.whichfork = whichfork;
> > -	ds.dargs.trans = sc->tp;
> > -	ds.dargs.op_flags = XFS_DA_OP_OKNOENT;
> > -	ds.state = xfs_da_state_alloc(&ds.dargs);
> > -	ds.sc = sc;
> > -	ds.private = private;
> > +	ds = kmem_zalloc(sizeof(struct xchk_da_btree), KM_NOFS | KM_MAYFAIL);
> > +	if (!ds)
> > +		return -ENOMEM;
> > +	ds->dargs.dp = sc->ip;
> > +	ds->dargs.whichfork = whichfork;
> > +	ds->dargs.trans = sc->tp;
> > +	ds->dargs.op_flags = XFS_DA_OP_OKNOENT;
> > +	ds->state = xfs_da_state_alloc(&ds->dargs);
> > +	ds->sc = sc;
> > +	ds->private = private;
> >  	if (whichfork == XFS_ATTR_FORK) {
> > -		ds.dargs.geo = mp->m_attr_geo;
> > -		ds.lowest = 0;
> > -		ds.highest = 0;
> > +		ds->dargs.geo = mp->m_attr_geo;
> > +		ds->lowest = 0;
> > +		ds->highest = 0;
> >  	} else {
> > -		ds.dargs.geo = mp->m_dir_geo;
> > -		ds.lowest = ds.dargs.geo->leafblk;
> > -		ds.highest = ds.dargs.geo->freeblk;
> > +		ds->dargs.geo = mp->m_dir_geo;
> > +		ds->lowest = ds->dargs.geo->leafblk;
> > +		ds->highest = ds->dargs.geo->freeblk;
> >  	}
> > -	blkno = ds.lowest;
> > +	blkno = ds->lowest;
> >  	level = 0;
> >  
> >  	/* Find the root of the da tree, if present. */
> > -	blks = ds.state->path.blk;
> > -	error = xchk_da_btree_block(&ds, level, blkno);
> > +	blks = ds->state->path.blk;
> > +	error = xchk_da_btree_block(ds, level, blkno);
> >  	if (error)
> >  		goto out_state;
> >  	/*
> > -	 * We didn't find a block at ds.lowest, which means that there's
> > +	 * We didn't find a block at ds->lowest, which means that there's
> >  	 * no LEAF1/LEAFN tree (at least not where it's supposed to be),
> >  	 * so jump out now.
> >  	 */
> > @@ -523,16 +526,16 @@ xchk_da_btree(
> >  		/* Handle leaf block. */
> >  		if (blks[level].magic != XFS_DA_NODE_MAGIC) {
> >  			/* End of leaf, pop back towards the root. */
> > -			if (blks[level].index >= ds.maxrecs[level]) {
> > +			if (blks[level].index >= ds->maxrecs[level]) {
> >  				if (level > 0)
> >  					blks[level - 1].index++;
> > -				ds.tree_level++;
> > +				ds->tree_level++;
> >  				level--;
> >  				continue;
> >  			}
> >  
> >  			/* Dispatch record scrubbing. */
> > -			error = scrub_fn(&ds, level);
> > +			error = scrub_fn(ds, level);
> >  			if (error)
> >  				break;
> >  			if (xchk_should_terminate(sc, &error) ||
> > @@ -545,17 +548,17 @@ xchk_da_btree(
> >  
> >  
> >  		/* End of node, pop back towards the root. */
> > -		if (blks[level].index >= ds.maxrecs[level]) {
> > +		if (blks[level].index >= ds->maxrecs[level]) {
> >  			if (level > 0)
> >  				blks[level - 1].index++;
> > -			ds.tree_level++;
> > +			ds->tree_level++;
> >  			level--;
> >  			continue;
> >  		}
> >  
> >  		/* Hashes in order for scrub? */
> > -		key = xchk_da_btree_node_entry(&ds, level);
> > -		error = xchk_da_btree_hash(&ds, level, &key->hashval);
> > +		key = xchk_da_btree_node_entry(ds, level);
> > +		error = xchk_da_btree_hash(ds, level, &key->hashval);
> >  		if (error)
> >  			goto out;
> >  
> > @@ -564,11 +567,11 @@ xchk_da_btree(
> >  		level++;
> >  		if (level >= XFS_DA_NODE_MAXDEPTH) {
> >  			/* Too deep! */
> > -			xchk_da_set_corrupt(&ds, level - 1);
> > +			xchk_da_set_corrupt(ds, level - 1);
> >  			break;
> >  		}
> > -		ds.tree_level--;
> > -		error = xchk_da_btree_block(&ds, level, blkno);
> > +		ds->tree_level--;
> > +		error = xchk_da_btree_block(ds, level, blkno);
> >  		if (error)
> >  			goto out;
> >  		if (blks[level].bp == NULL)
> > @@ -587,6 +590,7 @@ xchk_da_btree(
> >  	}
> >  
> >  out_state:
> > -	xfs_da_state_free(ds.state);
> > +	xfs_da_state_free(ds->state);
> > +	kmem_free(ds);
> >  	return error;
> >  }
> > diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
> > index 51e4c61916d2..0569b15526ea 100644
> > --- a/fs/xfs/scrub/scrub.c
> > +++ b/fs/xfs/scrub/scrub.c
> > @@ -461,15 +461,10 @@ xfs_scrub_metadata(
> >  	struct file			*file,
> >  	struct xfs_scrub_metadata	*sm)
> >  {
> > -	struct xfs_scrub		sc = {
> > -		.file			= file,
> > -		.sm			= sm,
> > -	};
> > +	struct xfs_scrub		*sc;
> >  	struct xfs_mount		*mp = XFS_I(file_inode(file))->i_mount;
> >  	int				error = 0;
> >  
> > -	sc.mp = mp;
> > -
> >  	BUILD_BUG_ON(sizeof(meta_scrub_ops) !=
> >  		(sizeof(struct xchk_meta_ops) * XFS_SCRUB_TYPE_NR));
> >  
> > @@ -489,59 +484,68 @@ xfs_scrub_metadata(
> >  
> >  	xchk_experimental_warning(mp);
> >  
> > -	sc.ops = &meta_scrub_ops[sm->sm_type];
> > -	sc.sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
> > +	sc = kmem_zalloc(sizeof(struct xfs_scrub), KM_NOFS | KM_MAYFAIL);
> > +	if (!sc) {
> > +		error = -ENOMEM;
> > +		goto out;
> > +	}
> > +
> > +	sc->mp = mp;
> > +	sc->file = file;
> > +	sc->sm = sm;
> > +	sc->ops = &meta_scrub_ops[sm->sm_type];
> > +	sc->sick_mask = xchk_health_mask_for_scrub_type(sm->sm_type);
> >  retry_op:
> >  	/*
> >  	 * When repairs are allowed, prevent freezing or readonly remount while
> >  	 * scrub is running with a real transaction.
> >  	 */
> >  	if (sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) {
> > -		error = mnt_want_write_file(sc.file);
> > +		error = mnt_want_write_file(sc->file);
> >  		if (error)
> >  			goto out;
> 
> The above should be "goto out_sc" ...
> 
> >  	}
> >  
> >  	/* Set up for the operation. */
> > -	error = sc.ops->setup(&sc);
> > +	error = sc->ops->setup(sc);
> >  	if (error)
> >  		goto out_teardown;
> >  
> >  	/* Scrub for errors. */
> > -	error = sc.ops->scrub(&sc);
> > -	if (!(sc.flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
> > +	error = sc->ops->scrub(sc);
> > +	if (!(sc->flags & XCHK_TRY_HARDER) && error == -EDEADLOCK) {
> >  		/*
> >  		 * Scrubbers return -EDEADLOCK to mean 'try harder'.
> >  		 * Tear down everything we hold, then set up again with
> >  		 * preparation for worst-case scenarios.
> >  		 */
> > -		error = xchk_teardown(&sc, 0);
> > +		error = xchk_teardown(sc, 0);
> >  		if (error)
> >  			goto out;
> 
> ... also, the one above.

Ugh, that must have been a porting error.  Fixed.

--D

> > -		sc.flags |= XCHK_TRY_HARDER;
> > +		sc->flags |= XCHK_TRY_HARDER;
> >  		goto retry_op;
> >  	} else if (error || (sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE))
> >  		goto out_teardown;
> >  
> > -	xchk_update_health(&sc);
> > +	xchk_update_health(sc);
> >  
> > -	if ((sc.sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
> > -	    !(sc.flags & XREP_ALREADY_FIXED)) {
> > +	if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) &&
> > +	    !(sc->flags & XREP_ALREADY_FIXED)) {
> >  		bool needs_fix;
> >  
> >  		/* Let debug users force us into the repair routines. */
> >  		if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR))
> > -			sc.sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
> > +			sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
> >  
> > -		needs_fix = (sc.sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
> > -						XFS_SCRUB_OFLAG_XCORRUPT |
> > -						XFS_SCRUB_OFLAG_PREEN));
> > +		needs_fix = (sc->sm->sm_flags & (XFS_SCRUB_OFLAG_CORRUPT |
> > +						 XFS_SCRUB_OFLAG_XCORRUPT |
> > +						 XFS_SCRUB_OFLAG_PREEN));
> >  		/*
> >  		 * If userspace asked for a repair but it wasn't necessary,
> >  		 * report that back to userspace.
> >  		 */
> >  		if (!needs_fix) {
> > -			sc.sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
> > +			sc->sm->sm_flags |= XFS_SCRUB_OFLAG_NO_REPAIR_NEEDED;
> >  			goto out_nofix;
> >  		}
> >  
> > @@ -549,26 +553,28 @@ xfs_scrub_metadata(
> >  		 * If it's broken, userspace wants us to fix it, and we haven't
> >  		 * already tried to fix it, then attempt a repair.
> >  		 */
> > -		error = xrep_attempt(&sc);
> > +		error = xrep_attempt(sc);
> >  		if (error == -EAGAIN) {
> >  			/*
> >  			 * Either the repair function succeeded or it couldn't
> >  			 * get all the resources it needs; either way, we go
> >  			 * back to the beginning and call the scrub function.
> >  			 */
> > -			error = xchk_teardown(&sc, 0);
> > +			error = xchk_teardown(sc, 0);
> >  			if (error) {
> >  				xrep_failure(mp);
> > -				goto out;
> > +				goto out_sc;
> >  			}
> >  			goto retry_op;
> >  		}
> >  	}
> >  
> >  out_nofix:
> > -	xchk_postmortem(&sc);
> > +	xchk_postmortem(sc);
> >  out_teardown:
> > -	error = xchk_teardown(&sc, error);
> > +	error = xchk_teardown(sc, error);
> > +out_sc:
> > +	kmem_free(sc);
> >  out:
> >  	trace_xchk_done(XFS_I(file_inode(file)), sm, error);
> >  	if (error == -EFSCORRUPTED || error == -EFSBADCRC) {
> 
> 
> -- 
> chandan