On Sat, May 12, 2018 at 08:51:06AM +1000, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > So it can be shared with userspace (e.g. mkfs) easily. > > Signed-Off-By: Dave Chinner <dchinner@xxxxxxxxxx> Looks ok, Reviewed-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --D > --- > fs/xfs/Makefile | 1 + > fs/xfs/libxfs/xfs_ag.c | 376 ++++++++++++++++++++++++++++++++++ > fs/xfs/libxfs/xfs_ag.h | 26 +++ > fs/xfs/libxfs/xfs_sb.c | 73 +++++++ > fs/xfs/libxfs/xfs_sb.h | 9 + > fs/xfs/xfs_fsops.c | 452 +---------------------------------------- > 6 files changed, 489 insertions(+), 448 deletions(-) > create mode 100644 fs/xfs/libxfs/xfs_ag.c > create mode 100644 fs/xfs/libxfs/xfs_ag.h > > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile > index 7ceb41a9786a..762477208322 100644 > --- a/fs/xfs/Makefile > +++ b/fs/xfs/Makefile > @@ -28,6 +28,7 @@ xfs-y += xfs_trace.o > > # build the libxfs code first > xfs-y += $(addprefix libxfs/, \ > + xfs_ag.o \ > xfs_alloc.o \ > xfs_alloc_btree.o \ > xfs_attr.o \ > diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c > new file mode 100644 > index 000000000000..638bc162afb5 > --- /dev/null > +++ b/fs/xfs/libxfs/xfs_ag.c > @@ -0,0 +1,376 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (c) 2000-2005 Silicon Graphics, Inc. > + * Copyright (c) 2018 Red Hat, Inc. > + * All rights reserved. > + */ > + > +#include "xfs.h" > +#include "xfs_fs.h" > +#include "xfs_shared.h" > +#include "xfs_format.h" > +#include "xfs_trans_resv.h" > +#include "xfs_sb.h" > +#include "xfs_mount.h" > +#include "xfs_btree.h" > +#include "xfs_alloc_btree.h" > +#include "xfs_rmap_btree.h" > +#include "xfs_alloc.h" > +#include "xfs_rmap.h" > +#include "xfs_ag.h" > + > +static struct xfs_buf * > +xfs_get_aghdr_buf( > + struct xfs_mount *mp, > + xfs_daddr_t blkno, > + size_t numblks, > + int flags, > + const struct xfs_buf_ops *ops) > +{ > + struct xfs_buf *bp; > + > + bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags); > + if (!bp) > + return NULL; > + > + xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); > + bp->b_bn = blkno; > + bp->b_maps[0].bm_bn = blkno; > + bp->b_ops = ops; > + > + return bp; > +} > + > +/* > + * Generic btree root block init function > + */ > +static void > +xfs_btroot_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + xfs_btree_init_block(mp, bp, id->type, 0, id->numrecs, id->agno, 0); > +} > + > +/* > + * Alloc btree root block init functions > + */ > +static void > +xfs_bnoroot_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_alloc_rec *arec; > + > + xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0); > + arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); > + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); > + arec->ar_blockcount = cpu_to_be32(id->agsize - > + be32_to_cpu(arec->ar_startblock)); > +} > + > +static void > +xfs_cntroot_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_alloc_rec *arec; > + > + xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); > + arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); > + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); > + arec->ar_blockcount = cpu_to_be32(id->agsize - > + be32_to_cpu(arec->ar_startblock)); > +} > + > +/* > + * Reverse map root block init > + */ > +static void > +xfs_rmaproot_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); > + struct xfs_rmap_rec *rrec; > + > + xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); > + > + /* > + * mark the AG header regions as static metadata The BNO > + * btree block is the first block after the headers, so > + * it's location defines the size of region the static > + * metadata consumes. > + * > + * Note: unlike mkfs, we never have to account for log > + * space when growing the data regions > + */ > + rrec = XFS_RMAP_REC_ADDR(block, 1); > + rrec->rm_startblock = 0; > + rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); > + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); > + rrec->rm_offset = 0; > + > + /* account freespace btree root blocks */ > + rrec = XFS_RMAP_REC_ADDR(block, 2); > + rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); > + rrec->rm_blockcount = cpu_to_be32(2); > + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); > + rrec->rm_offset = 0; > + > + /* account inode btree root blocks */ > + rrec = XFS_RMAP_REC_ADDR(block, 3); > + rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); > + rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - > + XFS_IBT_BLOCK(mp)); > + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); > + rrec->rm_offset = 0; > + > + /* account for rmap btree root */ > + rrec = XFS_RMAP_REC_ADDR(block, 4); > + rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); > + rrec->rm_blockcount = cpu_to_be32(1); > + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); > + rrec->rm_offset = 0; > + > + /* account for refc btree root */ > + if (xfs_sb_version_hasreflink(&mp->m_sb)) { > + rrec = XFS_RMAP_REC_ADDR(block, 5); > + rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); > + rrec->rm_blockcount = cpu_to_be32(1); > + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); > + rrec->rm_offset = 0; > + be16_add_cpu(&block->bb_numrecs, 1); > + } > +} > + > +/* > + * Initialise new secondary superblocks with the pre-grow geometry, but mark > + * them as "in progress" so we know they haven't yet been activated. This will > + * get cleared when the update with the new geometry information is done after > + * changes to the primary are committed. This isn't strictly necessary, but we > + * get it for free with the delayed buffer write lists and it means we can tell > + * if a grow operation didn't complete properly after the fact. > + */ > +static void > +xfs_sbblock_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); > + > + xfs_sb_to_disk(dsb, &mp->m_sb); > + dsb->sb_inprogress = 1; > +} > + > +static void > +xfs_agfblock_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); > + xfs_extlen_t tmpsize; > + > + agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); > + agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); > + agf->agf_seqno = cpu_to_be32(id->agno); > + agf->agf_length = cpu_to_be32(id->agsize); > + agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp)); > + agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); > + agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); > + agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); > + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { > + agf->agf_roots[XFS_BTNUM_RMAPi] = > + cpu_to_be32(XFS_RMAP_BLOCK(mp)); > + agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); > + agf->agf_rmap_blocks = cpu_to_be32(1); > + } > + > + agf->agf_flfirst = cpu_to_be32(1); > + agf->agf_fllast = 0; > + agf->agf_flcount = 0; > + tmpsize = id->agsize - mp->m_ag_prealloc_blocks; > + agf->agf_freeblks = cpu_to_be32(tmpsize); > + agf->agf_longest = cpu_to_be32(tmpsize); > + if (xfs_sb_version_hascrc(&mp->m_sb)) > + uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); > + if (xfs_sb_version_hasreflink(&mp->m_sb)) { > + agf->agf_refcount_root = cpu_to_be32( > + xfs_refc_block(mp)); > + agf->agf_refcount_level = cpu_to_be32(1); > + agf->agf_refcount_blocks = cpu_to_be32(1); > + } > +} > + > +static void > +xfs_agflblock_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); > + __be32 *agfl_bno; > + int bucket; > + > + if (xfs_sb_version_hascrc(&mp->m_sb)) { > + agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); > + agfl->agfl_seqno = cpu_to_be32(id->agno); > + uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); > + } > + > + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); > + for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) > + agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); > +} > + > +static void > +xfs_agiblock_init( > + struct xfs_mount *mp, > + struct xfs_buf *bp, > + struct aghdr_init_data *id) > +{ > + struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); > + int bucket; > + > + agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); > + agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); > + agi->agi_seqno = cpu_to_be32(id->agno); > + agi->agi_length = cpu_to_be32(id->agsize); > + agi->agi_count = 0; > + agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); > + agi->agi_level = cpu_to_be32(1); > + agi->agi_freecount = 0; > + agi->agi_newino = cpu_to_be32(NULLAGINO); > + agi->agi_dirino = cpu_to_be32(NULLAGINO); > + if (xfs_sb_version_hascrc(&mp->m_sb)) > + uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); > + if (xfs_sb_version_hasfinobt(&mp->m_sb)) { > + agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); > + agi->agi_free_level = cpu_to_be32(1); > + } > + for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) > + agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); > +} > + > +typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, > + struct aghdr_init_data *id); > +static int > +xfs_ag_init_hdr( > + struct xfs_mount *mp, > + struct aghdr_init_data *id, > + aghdr_init_work_f work, > + const struct xfs_buf_ops *ops) > + > +{ > + struct xfs_buf *bp; > + > + bp = xfs_get_aghdr_buf(mp, id->daddr, id->numblks, 0, ops); > + if (!bp) > + return -ENOMEM; > + > + (*work)(mp, bp, id); > + > + xfs_buf_delwri_queue(bp, &id->buffer_list); > + xfs_buf_relse(bp); > + return 0; > +} > + > +struct xfs_aghdr_grow_data { > + xfs_daddr_t daddr; > + size_t numblks; > + const struct xfs_buf_ops *ops; > + aghdr_init_work_f work; > + xfs_btnum_t type; > + int numrecs; > + bool need_init; > +}; > + > +/* > + * Prepare new AG headers to be written to disk. We use uncached buffers here, > + * as it is assumed these new AG headers are currently beyond the currently > + * valid filesystem address space. Using cached buffers would trip over EOFS > + * corruption detection alogrithms in the buffer cache lookup routines. > + * > + * This is a non-transactional function, but the prepared buffers are added to a > + * delayed write buffer list supplied by the caller so they can submit them to > + * disk and wait on them as required. > + */ > +int > +xfs_ag_init_headers( > + struct xfs_mount *mp, > + struct aghdr_init_data *id) > + > +{ > + struct xfs_aghdr_grow_data aghdr_data[] = { > + /* SB */ > + { XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), > + XFS_FSS_TO_BB(mp, 1), &xfs_sb_buf_ops, > + &xfs_sbblock_init, 0, 0, true }, > + /* AGF */ > + { XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), > + XFS_FSS_TO_BB(mp, 1), &xfs_agf_buf_ops, > + &xfs_agfblock_init, 0, 0, true }, > + /* AGFL */ > + { XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), > + XFS_FSS_TO_BB(mp, 1), &xfs_agfl_buf_ops, > + &xfs_agflblock_init, 0, 0, true }, > + /* AGI */ > + { XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), > + XFS_FSS_TO_BB(mp, 1), &xfs_agi_buf_ops, > + &xfs_agiblock_init, 0, 0, true }, > + /* BNO root block */ > + { XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), > + BTOBB(mp->m_sb.sb_blocksize), &xfs_allocbt_buf_ops, > + &xfs_bnoroot_init, 0, 0, true }, > + /* CNT root block */ > + { XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), > + BTOBB(mp->m_sb.sb_blocksize), &xfs_allocbt_buf_ops, > + &xfs_cntroot_init, 0, 0, true }, > + /* INO root block */ > + { XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), > + BTOBB(mp->m_sb.sb_blocksize), &xfs_inobt_buf_ops, > + &xfs_btroot_init, XFS_BTNUM_INO, 0, true }, > + /* FINO root block */ > + { XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), > + BTOBB(mp->m_sb.sb_blocksize), &xfs_inobt_buf_ops, > + &xfs_btroot_init, XFS_BTNUM_FINO, 0, > + xfs_sb_version_hasfinobt(&mp->m_sb) }, > + /* RMAP root block */ > + { XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), > + BTOBB(mp->m_sb.sb_blocksize), &xfs_rmapbt_buf_ops, > + &xfs_rmaproot_init, 0, 0, > + xfs_sb_version_hasrmapbt(&mp->m_sb) }, > + /* REFC root block */ > + { XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), > + BTOBB(mp->m_sb.sb_blocksize), &xfs_refcountbt_buf_ops, > + &xfs_btroot_init, XFS_BTNUM_REFC, 0, > + xfs_sb_version_hasreflink(&mp->m_sb) }, > + /* NULL terminating block */ > + { XFS_BUF_DADDR_NULL, 0, NULL, NULL, 0, 0, false }, > + }; > + struct xfs_aghdr_grow_data *dp; > + int error = 0; > + > + /* Account for AG free space in new AG */ > + id->nfree += id->agsize - mp->m_ag_prealloc_blocks; > + > + for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { > + if (!dp->need_init) > + continue; > + > + id->daddr = dp->daddr; > + id->numblks = dp->numblks; > + id->numrecs = dp->numrecs; > + id->type = dp->type; > + error = xfs_ag_init_hdr(mp, id, dp->work, dp->ops); > + if (error) > + break; > + } > + return error; > +} > diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h > new file mode 100644 > index 000000000000..788f37df6610 > --- /dev/null > +++ b/fs/xfs/libxfs/xfs_ag.h > @@ -0,0 +1,26 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +/* > + * Copyright (c) 2018 Red Hat, Inc. > + * All rights reserved. > + */ > + > +#ifndef __LIBXFS_AG_H > +#define __LIBXFS_AG_H 1 > + > +struct aghdr_init_data { > + /* per ag data */ > + xfs_agblock_t agno; /* ag to init */ > + xfs_extlen_t agsize; /* new AG size */ > + struct list_head buffer_list; /* buffer writeback list */ > + xfs_rfsblock_t nfree; /* cumulative new free space */ > + > + /* per header data */ > + xfs_daddr_t daddr; /* header location */ > + size_t numblks; /* size of header */ > + xfs_btnum_t type; /* type of btree root block */ > + int numrecs; /* recs in btree root block */ > +}; > + > +int xfs_ag_init_headers( struct xfs_mount *mp, struct aghdr_init_data *id); > + > +#endif /* __LIBXFS_AG_H */ > diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c > index d9b94bd5f689..75b2fee6f99a 100644 > --- a/fs/xfs/libxfs/xfs_sb.c > +++ b/fs/xfs/libxfs/xfs_sb.c > @@ -888,6 +888,79 @@ xfs_sync_sb( > return xfs_trans_commit(tp); > } > > +/* > + * Update all the secondary superblocks to match the new state of the primary. > + * Because we are completely overwriting all the existing fields in the > + * secondary superblock buffers, there is no need to read them in from disk. > + * Just get a new buffer, stamp it and write it. > + * > + * The sb buffers need to be cached here so that we serialise against other > + * operations that access the secondary superblocks, but we don't want to keep > + * them in memory once it is written so we mark it as a one-shot buffer. > + */ > +int > +xfs_update_secondary_sbs( > + struct xfs_mount *mp) > +{ > + xfs_agnumber_t agno; > + int saved_error = 0; > + int error = 0; > + LIST_HEAD (buffer_list); > + > + /* update secondary superblocks. */ > + for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) { > + struct xfs_buf *bp; > + > + bp = xfs_buf_get(mp->m_ddev_targp, > + XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), > + XFS_FSS_TO_BB(mp, 1), 0); > + /* > + * If we get an error reading or writing alternate superblocks, > + * continue. xfs_repair chooses the "best" superblock based > + * on most matches; if we break early, we'll leave more > + * superblocks un-updated than updated, and xfs_repair may > + * pick them over the properly-updated primary. > + */ > + if (!bp) { > + xfs_warn(mp, > + "error allocating secondary superblock for ag %d", > + agno); > + if (!saved_error) > + saved_error = -ENOMEM; > + continue; > + } > + > + bp->b_ops = &xfs_sb_buf_ops; > + xfs_buf_oneshot(bp); > + xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); > + xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); > + xfs_buf_delwri_queue(bp, &buffer_list); > + xfs_buf_relse(bp); > + > + /* don't hold too many buffers at once */ > + if (agno % 16) > + continue; > + > + error = xfs_buf_delwri_submit(&buffer_list); > + if (error) { > + xfs_warn(mp, > + "write error %d updating a secondary superblock near ag %d", > + error, agno); > + if (!saved_error) > + saved_error = error; > + continue; > + } > + } > + error = xfs_buf_delwri_submit(&buffer_list); > + if (error) { > + xfs_warn(mp, > + "write error %d updating a secondary superblock near ag %d", > + error, agno); > + } > + > + return saved_error ? saved_error : error; > +} > + > int > xfs_fs_geometry( > struct xfs_sb *sbp, > diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h > index 63dcd2a1a657..bb0196b56fa5 100644 > --- a/fs/xfs/libxfs/xfs_sb.h > +++ b/fs/xfs/libxfs/xfs_sb.h > @@ -18,6 +18,13 @@ > #ifndef __XFS_SB_H__ > #define __XFS_SB_H__ > > +struct xfs_mount; > +struct xfs_sb; > +struct xfs_dsb; > +struct xfs_trans; > +struct xfs_fsop_geom; > +struct xfs_perag; > + > /* > * perag get/put wrappers for ref counting > */ > @@ -34,6 +41,8 @@ extern void xfs_sb_from_disk(struct xfs_sb *to, struct xfs_dsb *from); > extern void xfs_sb_to_disk(struct xfs_dsb *to, struct xfs_sb *from); > extern void xfs_sb_quota_from_disk(struct xfs_sb *sbp); > > +extern int xfs_update_secondary_sbs(struct xfs_mount *mp); > + > #define XFS_FS_GEOM_MAX_STRUCT_VER (4) > extern int xfs_fs_geometry(struct xfs_sb *sbp, struct xfs_fsop_geom *geo, > int struct_version); > diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c > index 28692b0e61ce..c5087b79bcea 100644 > --- a/fs/xfs/xfs_fsops.c > +++ b/fs/xfs/xfs_fsops.c > @@ -24,11 +24,7 @@ > #include "xfs_sb.h" > #include "xfs_mount.h" > #include "xfs_defer.h" > -#include "xfs_da_format.h" > -#include "xfs_da_btree.h" > -#include "xfs_inode.h" > #include "xfs_trans.h" > -#include "xfs_inode_item.h" > #include "xfs_error.h" > #include "xfs_btree.h" > #include "xfs_alloc_btree.h" > @@ -36,384 +32,17 @@ > #include "xfs_rmap_btree.h" > #include "xfs_ialloc.h" > #include "xfs_fsops.h" > -#include "xfs_itable.h" > #include "xfs_trans_space.h" > #include "xfs_rtalloc.h" > #include "xfs_trace.h" > #include "xfs_log.h" > -#include "xfs_filestream.h" > #include "xfs_rmap.h" > +#include "xfs_ag.h" > #include "xfs_ag_resv.h" > > /* > - * File system operations > - */ > - > -static struct xfs_buf * > -xfs_growfs_get_hdr_buf( > - struct xfs_mount *mp, > - xfs_daddr_t blkno, > - size_t numblks, > - int flags, > - const struct xfs_buf_ops *ops) > -{ > - struct xfs_buf *bp; > - > - bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags); > - if (!bp) > - return NULL; > - > - xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); > - bp->b_bn = blkno; > - bp->b_maps[0].bm_bn = blkno; > - bp->b_ops = ops; > - > - return bp; > -} > - > -struct aghdr_init_data { > - /* per ag data */ > - xfs_agblock_t agno; > - xfs_extlen_t agsize; > - struct list_head buffer_list; > - xfs_rfsblock_t nfree; > - > - /* per header data */ > - xfs_daddr_t daddr; > - size_t numblks; > - xfs_btnum_t type; > - int numrecs; > -}; > - > -/* > - * Generic btree root block init function > - */ > -static void > -xfs_btroot_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - xfs_btree_init_block(mp, bp, id->type, 0, id->numrecs, id->agno, 0); > -} > - > -/* > - * Alloc btree root block init functions > - */ > -static void > -xfs_bnoroot_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_alloc_rec *arec; > - > - xfs_btree_init_block(mp, bp, XFS_BTNUM_BNO, 0, 1, id->agno, 0); > - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); > - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); > - arec->ar_blockcount = cpu_to_be32(id->agsize - > - be32_to_cpu(arec->ar_startblock)); > -} > - > -static void > -xfs_cntroot_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_alloc_rec *arec; > - > - xfs_btree_init_block(mp, bp, XFS_BTNUM_CNT, 0, 1, id->agno, 0); > - arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); > - arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); > - arec->ar_blockcount = cpu_to_be32(id->agsize - > - be32_to_cpu(arec->ar_startblock)); > -} > - > -/* > - * Reverse map root block init > - */ > -static void > -xfs_rmaproot_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_btree_block *block = XFS_BUF_TO_BLOCK(bp); > - struct xfs_rmap_rec *rrec; > - > - xfs_btree_init_block(mp, bp, XFS_BTNUM_RMAP, 0, 4, id->agno, 0); > - > - /* > - * mark the AG header regions as static metadata The BNO > - * btree block is the first block after the headers, so > - * it's location defines the size of region the static > - * metadata consumes. > - * > - * Note: unlike mkfs, we never have to account for log > - * space when growing the data regions > - */ > - rrec = XFS_RMAP_REC_ADDR(block, 1); > - rrec->rm_startblock = 0; > - rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); > - rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); > - rrec->rm_offset = 0; > - > - /* account freespace btree root blocks */ > - rrec = XFS_RMAP_REC_ADDR(block, 2); > - rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); > - rrec->rm_blockcount = cpu_to_be32(2); > - rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); > - rrec->rm_offset = 0; > - > - /* account inode btree root blocks */ > - rrec = XFS_RMAP_REC_ADDR(block, 3); > - rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); > - rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - > - XFS_IBT_BLOCK(mp)); > - rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); > - rrec->rm_offset = 0; > - > - /* account for rmap btree root */ > - rrec = XFS_RMAP_REC_ADDR(block, 4); > - rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); > - rrec->rm_blockcount = cpu_to_be32(1); > - rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); > - rrec->rm_offset = 0; > - > - /* account for refc btree root */ > - if (xfs_sb_version_hasreflink(&mp->m_sb)) { > - rrec = XFS_RMAP_REC_ADDR(block, 5); > - rrec->rm_startblock = cpu_to_be32(xfs_refc_block(mp)); > - rrec->rm_blockcount = cpu_to_be32(1); > - rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_REFC); > - rrec->rm_offset = 0; > - be16_add_cpu(&block->bb_numrecs, 1); > - } > -} > - > -/* > - * Initialise new secondary superblocks with the pre-grow geometry, but mark > - * them as "in progress" so we know they haven't yet been activated. This will > - * get cleared when the update with the new geometry information is done after > - * changes to the primary are committed. This isn't strictly necessary, but we > - * get it for free with the delayed buffer write lists and it means we can tell > - * if a grow operation didn't complete properly after the fact. > - */ > -static void > -xfs_sbblock_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_dsb *dsb = XFS_BUF_TO_SBP(bp); > - > - xfs_sb_to_disk(dsb, &mp->m_sb); > - dsb->sb_inprogress = 1; > -} > - > -static void > -xfs_agfblock_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_agf *agf = XFS_BUF_TO_AGF(bp); > - xfs_extlen_t tmpsize; > - > - agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC); > - agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION); > - agf->agf_seqno = cpu_to_be32(id->agno); > - agf->agf_length = cpu_to_be32(id->agsize); > - agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp)); > - agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp)); > - agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1); > - agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); > - if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { > - agf->agf_roots[XFS_BTNUM_RMAPi] = > - cpu_to_be32(XFS_RMAP_BLOCK(mp)); > - agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); > - agf->agf_rmap_blocks = cpu_to_be32(1); > - } > - > - agf->agf_flfirst = cpu_to_be32(1); > - agf->agf_fllast = 0; > - agf->agf_flcount = 0; > - tmpsize = id->agsize - mp->m_ag_prealloc_blocks; > - agf->agf_freeblks = cpu_to_be32(tmpsize); > - agf->agf_longest = cpu_to_be32(tmpsize); > - if (xfs_sb_version_hascrc(&mp->m_sb)) > - uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid); > - if (xfs_sb_version_hasreflink(&mp->m_sb)) { > - agf->agf_refcount_root = cpu_to_be32( > - xfs_refc_block(mp)); > - agf->agf_refcount_level = cpu_to_be32(1); > - agf->agf_refcount_blocks = cpu_to_be32(1); > - } > -} > - > -static void > -xfs_agflblock_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_agfl *agfl = XFS_BUF_TO_AGFL(bp); > - __be32 *agfl_bno; > - int bucket; > - > - if (xfs_sb_version_hascrc(&mp->m_sb)) { > - agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC); > - agfl->agfl_seqno = cpu_to_be32(id->agno); > - uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid); > - } > - > - agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp); > - for (bucket = 0; bucket < xfs_agfl_size(mp); bucket++) > - agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK); > -} > - > -static void > -xfs_agiblock_init( > - struct xfs_mount *mp, > - struct xfs_buf *bp, > - struct aghdr_init_data *id) > -{ > - struct xfs_agi *agi = XFS_BUF_TO_AGI(bp); > - int bucket; > - > - agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC); > - agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION); > - agi->agi_seqno = cpu_to_be32(id->agno); > - agi->agi_length = cpu_to_be32(id->agsize); > - agi->agi_count = 0; > - agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp)); > - agi->agi_level = cpu_to_be32(1); > - agi->agi_freecount = 0; > - agi->agi_newino = cpu_to_be32(NULLAGINO); > - agi->agi_dirino = cpu_to_be32(NULLAGINO); > - if (xfs_sb_version_hascrc(&mp->m_sb)) > - uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid); > - if (xfs_sb_version_hasfinobt(&mp->m_sb)) { > - agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp)); > - agi->agi_free_level = cpu_to_be32(1); > - } > - for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++) > - agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO); > -} > - > -typedef void (*aghdr_init_work_f)(struct xfs_mount *mp, struct xfs_buf *bp, > - struct aghdr_init_data *id); > -static int > -xfs_growfs_init_aghdr( > - struct xfs_mount *mp, > - struct aghdr_init_data *id, > - aghdr_init_work_f work, > - const struct xfs_buf_ops *ops) > - > -{ > - struct xfs_buf *bp; > - > - bp = xfs_growfs_get_hdr_buf(mp, id->daddr, id->numblks, 0, ops); > - if (!bp) > - return -ENOMEM; > - > - (*work)(mp, bp, id); > - > - xfs_buf_delwri_queue(bp, &id->buffer_list); > - xfs_buf_relse(bp); > - return 0; > -} > - > -struct xfs_aghdr_grow_data { > - xfs_daddr_t daddr; > - size_t numblks; > - const struct xfs_buf_ops *ops; > - aghdr_init_work_f work; > - xfs_btnum_t type; > - int numrecs; > - bool need_init; > -}; > - > -/* > - * Write new AG headers to disk. Non-transactional, but written > - * synchronously so they are completed prior to the growfs transaction > - * being logged. > + * growfs operations > */ > -static int > -xfs_grow_ag_headers( > - struct xfs_mount *mp, > - struct aghdr_init_data *id) > - > -{ > - struct xfs_aghdr_grow_data aghdr_data[] = { > - /* SB */ > - { XFS_AG_DADDR(mp, id->agno, XFS_SB_DADDR), > - XFS_FSS_TO_BB(mp, 1), &xfs_sb_buf_ops, > - &xfs_sbblock_init, 0, 0, true }, > - /* AGF */ > - { XFS_AG_DADDR(mp, id->agno, XFS_AGF_DADDR(mp)), > - XFS_FSS_TO_BB(mp, 1), &xfs_agf_buf_ops, > - &xfs_agfblock_init, 0, 0, true }, > - /* AGFL */ > - { XFS_AG_DADDR(mp, id->agno, XFS_AGFL_DADDR(mp)), > - XFS_FSS_TO_BB(mp, 1), &xfs_agfl_buf_ops, > - &xfs_agflblock_init, 0, 0, true }, > - /* AGI */ > - { XFS_AG_DADDR(mp, id->agno, XFS_AGI_DADDR(mp)), > - XFS_FSS_TO_BB(mp, 1), &xfs_agi_buf_ops, > - &xfs_agiblock_init, 0, 0, true }, > - /* BNO root block */ > - { XFS_AGB_TO_DADDR(mp, id->agno, XFS_BNO_BLOCK(mp)), > - BTOBB(mp->m_sb.sb_blocksize), &xfs_allocbt_buf_ops, > - &xfs_bnoroot_init, 0, 0, true }, > - /* CNT root block */ > - { XFS_AGB_TO_DADDR(mp, id->agno, XFS_CNT_BLOCK(mp)), > - BTOBB(mp->m_sb.sb_blocksize), &xfs_allocbt_buf_ops, > - &xfs_cntroot_init, 0, 0, true }, > - /* INO root block */ > - { XFS_AGB_TO_DADDR(mp, id->agno, XFS_IBT_BLOCK(mp)), > - BTOBB(mp->m_sb.sb_blocksize), &xfs_inobt_buf_ops, > - &xfs_btroot_init, XFS_BTNUM_INO, 0, true }, > - /* FINO root block */ > - { XFS_AGB_TO_DADDR(mp, id->agno, XFS_FIBT_BLOCK(mp)), > - BTOBB(mp->m_sb.sb_blocksize), &xfs_inobt_buf_ops, > - &xfs_btroot_init, XFS_BTNUM_FINO, 0, > - xfs_sb_version_hasfinobt(&mp->m_sb) }, > - /* RMAP root block */ > - { XFS_AGB_TO_DADDR(mp, id->agno, XFS_RMAP_BLOCK(mp)), > - BTOBB(mp->m_sb.sb_blocksize), &xfs_rmapbt_buf_ops, > - &xfs_rmaproot_init, 0, 0, > - xfs_sb_version_hasrmapbt(&mp->m_sb) }, > - /* REFC root block */ > - { XFS_AGB_TO_DADDR(mp, id->agno, xfs_refc_block(mp)), > - BTOBB(mp->m_sb.sb_blocksize), &xfs_refcountbt_buf_ops, > - &xfs_btroot_init, XFS_BTNUM_REFC, 0, > - xfs_sb_version_hasreflink(&mp->m_sb) }, > - /* NULL terminating block */ > - { XFS_BUF_DADDR_NULL, 0, NULL, NULL, 0, 0, false }, > - }; > - struct xfs_aghdr_grow_data *dp; > - int error = 0; > - > - /* Account for AG free space in new AG */ > - id->nfree += id->agsize - mp->m_ag_prealloc_blocks; > - > - for (dp = &aghdr_data[0]; dp->daddr != XFS_BUF_DADDR_NULL; dp++) { > - if (!dp->need_init) > - continue; > - > - id->daddr = dp->daddr; > - id->numblks = dp->numblks; > - id->numrecs = dp->numrecs; > - id->type = dp->type; > - error = xfs_growfs_init_aghdr(mp, id, dp->work, dp->ops); > - if (error) > - break; > - } > - return error; > -} > - > static int > xfs_growfs_data_private( > xfs_mount_t *mp, /* mount point for filesystem */ > @@ -491,7 +120,7 @@ xfs_growfs_data_private( > else > id.agsize = mp->m_sb.sb_agblocks; > > - error = xfs_grow_ag_headers(mp, &id); > + error = xfs_ag_init_headers(mp, &id); > if (error) { > xfs_buf_delwri_cancel(&id.buffer_list); > goto out_trans_cancel; > @@ -655,79 +284,6 @@ xfs_growfs_imaxpct( > return xfs_trans_commit(tp); > } > > -/* > - * After a grow operation, we need to update all the secondary superblocks > - * to match the new state of the primary. Because we are completely overwriting > - * all the existing fields in the secondary superblock buffers, there is no need > - * to read them in from disk. Just get a new buffer, stamp it and write it. > - * > - * The sb buffers need to be cached here so that we serialise against scrub > - * scanning secondary superblocks, but we don't want to keep it in memory once > - * it is written so we mark it as a one-shot buffer. > - */ > -static int > -xfs_growfs_update_superblocks( > - struct xfs_mount *mp) > -{ > - xfs_agnumber_t agno; > - int saved_error = 0; > - int error = 0; > - LIST_HEAD (buffer_list); > - > - /* update secondary superblocks. */ > - for (agno = 1; agno < mp->m_sb.sb_agcount; agno++) { > - struct xfs_buf *bp; > - > - bp = xfs_buf_get(mp->m_ddev_targp, > - XFS_AG_DADDR(mp, agno, XFS_SB_DADDR), > - XFS_FSS_TO_BB(mp, 1), 0); > - /* > - * If we get an error reading or writing alternate superblocks, > - * continue. xfs_repair chooses the "best" superblock based > - * on most matches; if we break early, we'll leave more > - * superblocks un-updated than updated, and xfs_repair may > - * pick them over the properly-updated primary. > - */ > - if (!bp) { > - xfs_warn(mp, > - "error allocating secondary superblock for ag %d", > - agno); > - if (!saved_error) > - saved_error = -ENOMEM; > - continue; > - } > - > - bp->b_ops = &xfs_sb_buf_ops; > - xfs_buf_oneshot(bp); > - xfs_buf_zero(bp, 0, BBTOB(bp->b_length)); > - xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb); > - xfs_buf_delwri_queue(bp, &buffer_list); > - xfs_buf_relse(bp); > - > - /* don't hold too many buffers at once */ > - if (agno % 16) > - continue; > - > - error = xfs_buf_delwri_submit(&buffer_list); > - if (error) { > - xfs_warn(mp, > - "write error %d updating a secondary superblock near ag %d", > - error, agno); > - if (!saved_error) > - saved_error = error; > - continue; > - } > - } > - error = xfs_buf_delwri_submit(&buffer_list); > - if (error) { > - xfs_warn(mp, > - "write error %d updating a secondary superblock near ag %d", > - error, agno); > - } > - > - return saved_error ? saved_error : error; > -} > - > /* > * protected versions of growfs function acquire and release locks on the mount > * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG, > @@ -767,7 +323,7 @@ xfs_growfs_data( > mp->m_maxicount = 0; > > /* Update secondary superblocks now the physical grow has completed */ > - error = xfs_growfs_update_superblocks(mp); > + error = xfs_update_secondary_sbs(mp); > > out_error: > /* > -- > 2.17.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html