Transactions that perform multiple allocations may inadvertently run out of space after the first allocation selects an AG that appears to have enough available space. The problem occurs when the allocation in the transaction splits freespace b-trees but the second allocation does not have enough available space to refill the AGFL. This results in an aborted transaction and a filesystem shutdown. In this author's case, it's frequently encountered in the xfs_bmap_extents_to_btree path on a write to an AG that's almost reached its limits. The AGFL reservation allows us to save some blocks to refill the AGFL to its minimum level in an Nth allocation, and to prevent allocations from proceeding when there's not enough reserved space to accommodate the refill. This patch just brings back the reservation and does the plumbing. The policy decisions about which allocations to allow will be in a subsequent patch. This implementation includes space for the bnobt and cnobt in the reserve. This was done largely because the AGFL reserve stubs appeared to already be doing it this way. Signed-off-by: Krister Johansen <kjlx@xxxxxxxxxxxxxxxxxx> --- fs/xfs/libxfs/xfs_ag.h | 2 ++ fs/xfs/libxfs/xfs_ag_resv.c | 54 ++++++++++++++++++++++-------- fs/xfs/libxfs/xfs_ag_resv.h | 4 +++ fs/xfs/libxfs/xfs_alloc.c | 43 +++++++++++++++++++++++- fs/xfs/libxfs/xfs_alloc.h | 3 +- fs/xfs/libxfs/xfs_alloc_btree.c | 59 +++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_alloc_btree.h | 5 +++ fs/xfs/libxfs/xfs_rmap_btree.c | 5 +++ fs/xfs/scrub/fscounters.c | 1 + 9 files changed, 161 insertions(+), 15 deletions(-) diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h index 35de09a2516c..40bff82f2b7e 100644 --- a/fs/xfs/libxfs/xfs_ag.h +++ b/fs/xfs/libxfs/xfs_ag.h @@ -62,6 +62,8 @@ struct xfs_perag { struct xfs_ag_resv pag_meta_resv; /* Blocks reserved for the reverse mapping btree. */ struct xfs_ag_resv pag_rmapbt_resv; + /* Blocks reserved for the AGFL. */ + struct xfs_ag_resv pag_agfl_resv; /* for rcu-safe freeing */ struct rcu_head rcu_head; diff --git a/fs/xfs/libxfs/xfs_ag_resv.c b/fs/xfs/libxfs/xfs_ag_resv.c index 216423df939e..db1d416f6ac8 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.c +++ b/fs/xfs/libxfs/xfs_ag_resv.c @@ -17,6 +17,7 @@ #include "xfs_trans.h" #include "xfs_rmap_btree.h" #include "xfs_btree.h" +#include "xfs_alloc_btree.h" #include "xfs_refcount_btree.h" #include "xfs_ialloc_btree.h" #include "xfs_ag.h" @@ -75,12 +76,14 @@ xfs_ag_resv_critical( switch (type) { case XFS_AG_RESV_METADATA: - avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved; + avail = pag->pagf_freeblks - pag->pag_rmapbt_resv.ar_reserved - + pag->pag_agfl_resv.ar_reserved; orig = pag->pag_meta_resv.ar_asked; break; case XFS_AG_RESV_RMAPBT: avail = pag->pagf_freeblks + pag->pagf_flcount - - pag->pag_meta_resv.ar_reserved; + pag->pag_meta_resv.ar_reserved - + pag->pag_agfl_resv.ar_reserved; orig = pag->pag_rmapbt_resv.ar_asked; break; default: @@ -107,10 +110,14 @@ xfs_ag_resv_needed( { xfs_extlen_t len; - len = pag->pag_meta_resv.ar_reserved + pag->pag_rmapbt_resv.ar_reserved; + len = pag->pag_meta_resv.ar_reserved + + pag->pag_rmapbt_resv.ar_reserved + + pag->pag_agfl_resv.ar_reserved; + switch (type) { case XFS_AG_RESV_METADATA: case XFS_AG_RESV_RMAPBT: + case XFS_AG_RESV_AGFL: len -= xfs_perag_resv(pag, type)->ar_reserved; break; case XFS_AG_RESV_NONE: @@ -144,7 +151,7 @@ __xfs_ag_resv_free( * considered "free", so whatever was reserved at mount time must be * given back at umount. */ - if (type == XFS_AG_RESV_RMAPBT) + if (type == XFS_AG_RESV_RMAPBT || type == XFS_AG_RESV_AGFL) oldresv = resv->ar_orig_reserved; else oldresv = resv->ar_reserved; @@ -161,6 +168,7 @@ xfs_ag_resv_free( { __xfs_ag_resv_free(pag, XFS_AG_RESV_RMAPBT); __xfs_ag_resv_free(pag, XFS_AG_RESV_METADATA); + __xfs_ag_resv_free(pag, XFS_AG_RESV_AGFL); } static int @@ -180,11 +188,13 @@ __xfs_ag_resv_init( switch (type) { case XFS_AG_RESV_RMAPBT: + case XFS_AG_RESV_AGFL: /* - * Space taken by the rmapbt is not subtracted from fdblocks - * because the rmapbt lives in the free space. Here we must - * subtract the entire reservation from fdblocks so that we - * always have blocks available for rmapbt expansion. + * Space taken by the rmapbt and agfl are not subtracted from + * fdblocks because they both live in the free space. Here we + * must subtract the entire reservation from fdblocks so that we + * always have blocks available for rmapbt expansion and agfl + * refilling. */ hidden_space = ask; break; @@ -299,6 +309,25 @@ xfs_ag_resv_init( has_resv = true; } + /* Create the AGFL reservation */ + if (pag->pag_agfl_resv.ar_asked == 0) { + ask = used = 0; + + error = xfs_allocbt_calc_reserves(mp, tp, pag, &ask, &used); + if (error) + goto out; + + error = xfs_alloc_agfl_calc_reserves(mp, tp, pag, &ask, &used); + if (error) + goto out; + + error = __xfs_ag_resv_init(pag, XFS_AG_RESV_AGFL, ask, used); + if (error) + goto out; + if (ask) + has_resv = true; + } + out: /* * Initialize the pagf if we have at least one active reservation on the @@ -324,7 +353,8 @@ xfs_ag_resv_init( */ if (!error && xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved + - xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved > + xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved + + xfs_perag_resv(pag, XFS_AG_RESV_AGFL)->ar_reserved > pag->pagf_freeblks + pag->pagf_flcount) error = -ENOSPC; } @@ -347,7 +377,6 @@ xfs_ag_resv_alloc_extent( switch (type) { case XFS_AG_RESV_AGFL: - return; case XFS_AG_RESV_METADATA: case XFS_AG_RESV_RMAPBT: resv = xfs_perag_resv(pag, type); @@ -364,7 +393,7 @@ xfs_ag_resv_alloc_extent( len = min_t(xfs_extlen_t, args->len, resv->ar_reserved); resv->ar_reserved -= len; - if (type == XFS_AG_RESV_RMAPBT) + if (type == XFS_AG_RESV_RMAPBT || type == XFS_AG_RESV_AGFL) return; /* Allocations of reserved blocks only need on-disk sb updates... */ xfs_trans_mod_sb(args->tp, XFS_TRANS_SB_RES_FDBLOCKS, -(int64_t)len); @@ -389,7 +418,6 @@ xfs_ag_resv_free_extent( switch (type) { case XFS_AG_RESV_AGFL: - return; case XFS_AG_RESV_METADATA: case XFS_AG_RESV_RMAPBT: resv = xfs_perag_resv(pag, type); @@ -406,7 +434,7 @@ xfs_ag_resv_free_extent( leftover = min_t(xfs_extlen_t, len, resv->ar_asked - resv->ar_reserved); resv->ar_reserved += leftover; - if (type == XFS_AG_RESV_RMAPBT) + if (type == XFS_AG_RESV_RMAPBT || type == XFS_AG_RESV_AGFL) return; /* Freeing into the reserved pool only requires on-disk update... */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_RES_FDBLOCKS, len); diff --git a/fs/xfs/libxfs/xfs_ag_resv.h b/fs/xfs/libxfs/xfs_ag_resv.h index ff20ed93de77..ea2c16dfb843 100644 --- a/fs/xfs/libxfs/xfs_ag_resv.h +++ b/fs/xfs/libxfs/xfs_ag_resv.h @@ -28,6 +28,8 @@ xfs_perag_resv( return &pag->pag_meta_resv; case XFS_AG_RESV_RMAPBT: return &pag->pag_rmapbt_resv; + case XFS_AG_RESV_AGFL: + return &pag->pag_agfl_resv; default: return NULL; } @@ -48,6 +50,8 @@ xfs_ag_resv_rmapbt_alloc( args.len = 1; pag = xfs_perag_get(mp, agno); + /* Transfer this reservation from the AGFL to RMAPBT */ + xfs_ag_resv_free_extent(pag, XFS_AG_RESV_AGFL, NULL, 1); xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_RMAPBT, &args); xfs_perag_put(pag); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6c55a6e88eba..d70d027a8178 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -1176,12 +1176,14 @@ xfs_alloc_ag_vextent_small( /* * If we're feeding an AGFL block to something that doesn't live in the - * free space, we need to clear out the OWN_AG rmap. + * free space, we need to clear out the OWN_AG rmap and remove it from + * the AGFL reservation. */ error = xfs_rmap_free(args->tp, args->agbp, args->pag, fbno, 1, &XFS_RMAP_OINFO_AG); if (error) goto error; + xfs_ag_resv_free_extent(args->pag, XFS_AG_RESV_AGFL, args->tp, 1); *stat = 0; return 0; @@ -2778,6 +2780,43 @@ xfs_exact_minlen_extent_available( } #endif +/* + * Work out how many blocks to reserve for the AGFL as well as how many are in + * use currently. + */ +int +xfs_alloc_agfl_calc_reserves( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_perag *pag, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + struct xfs_buf *agbp; + struct xfs_agf *agf; + xfs_extlen_t agfl_blocks; + xfs_extlen_t list_len; + int error; + + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); + if (error) + return error; + + agf = agbp->b_addr; + agfl_blocks = xfs_alloc_min_freelist(mp, NULL); + list_len = be32_to_cpu(agf->agf_flcount); + xfs_trans_brelse(tp, agbp); + + /* + * Reserve enough space to refill AGFL to minimum fullness if btrees are + * at maximum height. + */ + *ask += agfl_blocks; + *used += list_len; + + return error; +} + /* * Decide whether to use this allocation group for this allocation. * If so, fix up the btree freelist's size. @@ -2944,6 +2983,8 @@ xfs_alloc_fix_freelist( if (error) goto out_agflbp_relse; + xfs_ag_resv_alloc_extent(targs.pag, targs.resv, &targs); + /* * Put each allocated block on the list. */ diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 0b956f8b9d5a..8cbdfb62ac14 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -80,7 +80,8 @@ int xfs_alloc_get_freelist(struct xfs_perag *pag, struct xfs_trans *tp, int xfs_alloc_put_freelist(struct xfs_perag *pag, struct xfs_trans *tp, struct xfs_buf *agfbp, struct xfs_buf *agflbp, xfs_agblock_t bno, int btreeblk); - +int xfs_alloc_agfl_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); /* * Compute and fill in value of m_alloc_maxlevels. */ diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c index 6ef5ddd89600..9c20f85a459d 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.c +++ b/fs/xfs/libxfs/xfs_alloc_btree.c @@ -671,6 +671,65 @@ xfs_allocbt_calc_size( return xfs_btree_calc_size(mp->m_alloc_mnr, len); } +/* + * Calculate the maximum alloc btree size. This is for a single allocbt. + * Callers wishing to compute both the size of the bnobt and cnobt must double + * this result. + */ +xfs_extlen_t +xfs_allocbt_max_size( + struct xfs_mount *mp, + xfs_agblock_t agblocks) +{ + + /* Don't proceed if uninitialized. Can happen in mkfs. */ + if (mp->m_alloc_mxr[0] == 0) + return 0; + + return xfs_allocbt_calc_size(mp, agblocks); +} + +/* + * Work out how many blocks to reserve for the bnobt and the cnobt as well as + * how many blocks are in use by these trees. + */ +int +xfs_allocbt_calc_reserves( + struct xfs_mount *mp, + struct xfs_trans *tp, + struct xfs_perag *pag, + xfs_extlen_t *ask, + xfs_extlen_t *used) +{ + struct xfs_buf *agbp; + struct xfs_agf *agf; + xfs_agblock_t agblocks; + xfs_extlen_t tree_len; + int error; + + error = xfs_alloc_read_agf(pag, tp, 0, &agbp); + if (error) + return error; + + agf = agbp->b_addr; + agblocks = be32_to_cpu(agf->agf_length); + tree_len = be32_to_cpu(agf->agf_btreeblks); + xfs_trans_brelse(tp, agbp); + + /* + * The log is permanently allocated. The space it occupies will never be + * available for btree expansion. Pretend the space is not there. + */ + if (xfs_ag_contains_log(mp, pag->pag_agno)) + agblocks -= mp->m_sb.sb_logblocks; + + /* Reserve 1% of the AG or enough for one block per record per tree. */ + *ask += max(agblocks / 100, 2 * xfs_allocbt_max_size(mp, agblocks)); + *used += tree_len; + + return error; +} + int __init xfs_allocbt_init_cur_cache(void) { diff --git a/fs/xfs/libxfs/xfs_alloc_btree.h b/fs/xfs/libxfs/xfs_alloc_btree.h index 155b47f231ab..8334195e2462 100644 --- a/fs/xfs/libxfs/xfs_alloc_btree.h +++ b/fs/xfs/libxfs/xfs_alloc_btree.h @@ -56,6 +56,11 @@ struct xfs_btree_cur *xfs_cntbt_init_cursor(struct xfs_mount *mp, extern int xfs_allocbt_maxrecs(struct xfs_mount *, int, int); extern xfs_extlen_t xfs_allocbt_calc_size(struct xfs_mount *mp, unsigned long long len); +extern xfs_extlen_t xfs_allocbt_max_size(struct xfs_mount *mp, + xfs_agblock_t agblocks); + +extern int xfs_allocbt_calc_reserves(struct xfs_mount *mp, struct xfs_trans *tp, + struct xfs_perag *pag, xfs_extlen_t *ask, xfs_extlen_t *used); void xfs_allocbt_commit_staged_btree(struct xfs_btree_cur *cur, struct xfs_trans *tp, struct xfs_buf *agbp); diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 9e759efa81cc..49b1652f715a 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -121,6 +121,7 @@ xfs_rmapbt_free_block( struct xfs_buf *agbp = cur->bc_ag.agbp; struct xfs_agf *agf = agbp->b_addr; struct xfs_perag *pag = cur->bc_ag.pag; + struct xfs_alloc_arg args = { NULL }; xfs_agblock_t bno; int error; @@ -135,6 +136,10 @@ xfs_rmapbt_free_block( XFS_EXTENT_BUSY_SKIP_DISCARD); xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1); + args.len = 1; + /* Transfer this reservation back to the AGFL. */ + xfs_ag_resv_alloc_extent(pag, XFS_AG_RESV_AGFL, &args); + return 0; } diff --git a/fs/xfs/scrub/fscounters.c b/fs/xfs/scrub/fscounters.c index 1d3e98346933..fec4aa13052a 100644 --- a/fs/xfs/scrub/fscounters.c +++ b/fs/xfs/scrub/fscounters.c @@ -338,6 +338,7 @@ xchk_fscount_aggregate_agcounts( */ fsc->fdblocks -= pag->pag_meta_resv.ar_reserved; fsc->fdblocks -= pag->pag_rmapbt_resv.ar_orig_reserved; + fsc->fdblocks -= pag->pag_agfl_resv.ar_orig_reserved; } if (pag) -- 2.25.1