From: Dave Chinner <dchinner@xxxxxxxxxx> Create v5 filesystems with rmapbt turned on. Document the rmapbt options to mkfs, and initialize the extra field we added for reflink support. v2: Turn on the rmapbt feature when calculating the minimum log size. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> [darrick.wong@xxxxxxxxxx: split patch, add commit message and extra fields] Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- include/xfs_multidisk.h | 2 - man/man8/mkfs.xfs.8 | 20 +++++++ mkfs/maxtrres.c | 5 +- mkfs/xfs_mkfs.c | 138 +++++++++++++++++++++++++++++++++++++++++------ 4 files changed, 145 insertions(+), 20 deletions(-) diff --git a/include/xfs_multidisk.h b/include/xfs_multidisk.h index 4429dab..8dc3027 100644 --- a/include/xfs_multidisk.h +++ b/include/xfs_multidisk.h @@ -68,6 +68,6 @@ extern void res_failed (int err); /* maxtrres.c */ extern int max_trans_res(unsigned long agsize, int crcs_enabled, int dirversion, int sectorlog, int blocklog, int inodelog, int dirblocklog, - int logversion, int log_sunit, int finobt); + int logversion, int log_sunit, int finobt, int rmapbt); #endif /* __XFS_MULTIDISK_H__ */ diff --git a/man/man8/mkfs.xfs.8 b/man/man8/mkfs.xfs.8 index 980b0e1..d88d314 100644 --- a/man/man8/mkfs.xfs.8 +++ b/man/man8/mkfs.xfs.8 @@ -193,6 +193,26 @@ is used, the free inode btree feature is not supported and is disabled. .BI uuid= value Use the given value as the filesystem UUID for the newly created filesystem. The default is to generate a random UUID. +.TP +.BI rmapbt= value +This option enables the creation of a reverse-mapping btree index in each +allocation group. The value is either 0 to disable the feature, or 1 to +create the btree. +.IP +The reverse mapping btree maps filesystem blocks to the owner of the +filesystem block. Most of the mappings will be to an inode number and an +offset, though there will also be mappings to filesystem metadata. This +secondary metadata can be used to validate the primary metadata or to +pinpoint exactly which data has been lost when a disk error occurs. +.IP +By default, +.B mkfs.xfs +will not create reverse mapping btrees. This feature is only available +for filesystems created with the (default) +.B \-m crc=1 +option set. When the option +.B \-m crc=0 +is used, the reverse mapping btree feature is not supported and is disabled. .RE .TP .BI \-d " data_section_options" diff --git a/mkfs/maxtrres.c b/mkfs/maxtrres.c index c0b1b5d..fc24eac 100644 --- a/mkfs/maxtrres.c +++ b/mkfs/maxtrres.c @@ -38,7 +38,8 @@ max_trans_res( int dirblocklog, int logversion, int log_sunit, - int finobt) + int finobt, + int rmapbt) { xfs_sb_t *sbp; xfs_mount_t mount; @@ -72,6 +73,8 @@ max_trans_res( XFS_DFL_SB_VERSION_BITS; if (finobt) sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_FINOBT; + if (rmapbt) + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT; libxfs_mount(&mount, sbp, 0,0,0,0); maxfsb = xfs_log_calc_minimum_size(&mount); diff --git a/mkfs/xfs_mkfs.c b/mkfs/xfs_mkfs.c index 8b3cad8..634dcfd 100644 --- a/mkfs/xfs_mkfs.c +++ b/mkfs/xfs_mkfs.c @@ -680,6 +680,8 @@ struct opt_params mopts = { "finobt", #define M_UUID 2 "uuid", +#define M_RMAPBT 3 + "rmapbt", NULL }, .subopt_params = { @@ -699,6 +701,12 @@ struct opt_params mopts = { .conflicts = { LAST_CONFLICT }, .defaultval = SUBOPT_NEEDS_VAL, }, + { .index = M_RMAPBT, + .conflicts = { LAST_CONFLICT }, + .minval = 0, + .maxval = 1, + .defaultval = 0, + }, }, }; @@ -1454,6 +1462,7 @@ struct sb_feat_args { bool crcs_enabled; bool dirftype; bool parent_pointers; + bool rmapbt; }; static void @@ -1524,6 +1533,8 @@ sb_set_features( if (fp->finobt) sbp->sb_features_ro_compat = XFS_SB_FEAT_RO_COMPAT_FINOBT; + if (fp->rmapbt) + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT; /* * Sparse inode chunk support has two main inode alignment requirements. @@ -1784,6 +1795,7 @@ main( .crcs_enabled = true, .dirftype = true, .parent_pointers = false, + .rmapbt = false, }; platform_uuid_generate(&uuid); @@ -2073,6 +2085,10 @@ main( if (platform_uuid_parse(value, &uuid)) illegal(optarg, "m uuid"); break; + case M_RMAPBT: + sb_feat.rmapbt = getnum( + value, &mopts, M_RMAPBT); + break; default: unknown('m', value); } @@ -2409,6 +2425,20 @@ _("sparse inodes not supported without CRC support\n")); } sb_feat.spinodes = 0; + if (sb_feat.rmapbt) { + fprintf(stderr, +_("rmapbt not supported without CRC support\n")); + usage(); + } + sb_feat.rmapbt = false; + } + + + if (sb_feat.rmapbt && xi.rtname) { + fprintf(stderr, +_("rmapbt not supported with realtime devices\n")); + usage(); + sb_feat.rmapbt = false; } if (nsflag || nlflag) { @@ -2890,7 +2920,8 @@ an AG size that is one stripe unit smaller, for example %llu.\n"), min_logblocks = max_trans_res(agsize, sb_feat.crcs_enabled, sb_feat.dir_version, sectorlog, blocklog, inodelog, dirblocklog, - sb_feat.log_version, lsunit, sb_feat.finobt); + sb_feat.log_version, lsunit, sb_feat.finobt, + sb_feat.rmapbt); ASSERT(min_logblocks); min_logblocks = MAX(XFS_MIN_LOG_BLOCKS, min_logblocks); if (!logsize && dblocks >= (1024*1024*1024) >> blocklog) @@ -2965,7 +2996,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), mp->m_sectbb_log = sbp->sb_sectlog - BBSHIFT; /* - * sb_versionnum and finobt flags must be set before we use + * sb_versionnum, finobt and rmapbt flags must be set before we use * xfs_prealloc_blocks(). */ sb_set_features(&mp->m_sb, &sb_feat, sectorsize, lsectorsize, dsunit); @@ -3025,7 +3056,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), printf(_( "meta-data=%-22s isize=%-6d agcount=%lld, agsize=%lld blks\n" " =%-22s sectsz=%-5u attr=%u, projid32bit=%u\n" - " =%-22s crc=%-8u finobt=%u, sparse=%u\n" + " =%-22s crc=%-8u finobt=%u, sparse=%u, rmapbt=%u\n" "data =%-22s bsize=%-6u blocks=%llu, imaxpct=%u\n" " =%-22s sunit=%-6u swidth=%u blks\n" "naming =version %-14u bsize=%-6u ascii-ci=%d ftype=%d\n" @@ -3036,6 +3067,7 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), "", sectorsize, sb_feat.attr_version, !sb_feat.projid16bit, "", sb_feat.crcs_enabled, sb_feat.finobt, sb_feat.spinodes, + sb_feat.rmapbt, "", blocksize, (long long)dblocks, imaxpct, "", dsunit, dswidth, sb_feat.dir_version, dirblocksize, sb_feat.nci, @@ -3217,6 +3249,12 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1); pag->pagf_levels[XFS_BTNUM_BNOi] = 1; pag->pagf_levels[XFS_BTNUM_CNTi] = 1; + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + agf->agf_roots[XFS_BTNUM_RMAPi] = + cpu_to_be32(XFS_RMAP_BLOCK(mp)); + agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1); + } + agf->agf_flfirst = 0; agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1); agf->agf_flcount = 0; @@ -3404,24 +3442,88 @@ _("size %s specified for log subvolume is too large, maximum is %lld blocks\n"), /* * Free INO btree root block */ - if (!sb_feat.finobt) { - xfs_perag_put(pag); - continue; + if (sb_feat.finobt) { + buf = libxfs_getbuf(mp->m_ddev_targp, + XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), + bsize); + buf->b_ops = &xfs_inobt_buf_ops; + block = XFS_BUF_TO_BLOCK(buf); + memset(block, 0, blocksize); + if (xfs_sb_version_hascrc(&mp->m_sb)) + xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0, + agno, XFS_BTREE_CRC_BLOCKS); + else + xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0, + agno, 0); + libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE); } - buf = libxfs_getbuf(mp->m_ddev_targp, - XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)), + /* RMAP btree root block */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) { + struct xfs_rmap_rec *rrec; + + buf = libxfs_getbuf(mp->m_ddev_targp, + XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)), bsize); - buf->b_ops = &xfs_inobt_buf_ops; - block = XFS_BUF_TO_BLOCK(buf); - memset(block, 0, blocksize); - if (xfs_sb_version_hascrc(&mp->m_sb)) - xfs_btree_init_block(mp, buf, XFS_FIBT_CRC_MAGIC, 0, 0, + buf->b_ops = &xfs_rmapbt_buf_ops; + block = XFS_BUF_TO_BLOCK(buf); + memset(block, 0, blocksize); + + xfs_btree_init_block(mp, buf, XFS_RMAP_CRC_MAGIC, 0, 0, agno, XFS_BTREE_CRC_BLOCKS); - else - xfs_btree_init_block(mp, buf, XFS_FIBT_MAGIC, 0, 0, - agno, 0); - libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE); + + /* + * mark the AG header regions as static metadata + * The BNO btree block is the first block after the + * headers, so it's location defines the size of region + * the static metadata consumes. + */ + rrec = XFS_RMAP_REC_ADDR(block, 1); + rrec->rm_startblock = 0; + rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp)); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + + /* account freespace btree root blocks */ + rrec = XFS_RMAP_REC_ADDR(block, 2); + rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp)); + rrec->rm_blockcount = cpu_to_be32(2); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + + /* account inode btree root blocks */ + rrec = XFS_RMAP_REC_ADDR(block, 3); + rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp)); + rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) - + XFS_IBT_BLOCK(mp)); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + + /* account for rmap btree root */ + rrec = XFS_RMAP_REC_ADDR(block, 4); + rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp)); + rrec->rm_blockcount = cpu_to_be32(1); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + + /* account for the log space */ + if (loginternal && agno == logagno) { + rrec = XFS_RMAP_REC_ADDR(block, 5); + rrec->rm_startblock = cpu_to_be32( + XFS_FSB_TO_AGBNO(mp, logstart)); + rrec->rm_blockcount = cpu_to_be32(logblocks); + rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_LOG); + rrec->rm_offset = 0; + be16_add_cpu(&block->bb_numrecs, 1); + } + + libxfs_writebuf(buf, LIBXFS_EXIT_ON_FAILURE); + } + xfs_perag_put(pag); } @@ -3646,7 +3748,7 @@ usage( void ) { fprintf(stderr, _("Usage: %s\n\ /* blocksize */ [-b log=n|size=num]\n\ -/* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx]\n\ +/* metadata */ [-m crc=0|1,finobt=0|1,uuid=xxx,rmapbt=0|1]\n\ /* data subvol */ [-d agcount=n,agsize=n,file,name=xxx,size=num,\n\ (sunit=value,swidth=value|su=num,sw=num|noalign),\n\ sectlog=n|sectsize=num\n\ _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs