From: "Darrick J. Wong" <djwong@xxxxxxxxxx> Add to xfs_db the ability to add certain existing features (finobt, reflink, and rmapbt) to an existing filesystem if it's eligible. Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx> Signed-off-by: Chandan Babu R <chandan.babu@xxxxxxxxxx> --- include/libxfs.h | 1 + libxfs/libxfs_api_defs.h | 3 + man/man8/xfs_admin.8 | 30 +++++ repair/dino_chunks.c | 6 +- repair/dinode.c | 5 +- repair/globals.c | 4 + repair/globals.h | 4 + repair/phase2.c | 285 +++++++++++++++++++++++++++++++++++++-- repair/phase4.c | 5 +- repair/protos.h | 1 + repair/rmap.c | 4 +- repair/xfs_repair.c | 44 ++++++ 12 files changed, 377 insertions(+), 15 deletions(-) diff --git a/include/libxfs.h b/include/libxfs.h index 82618a56..7100155f 100644 --- a/include/libxfs.h +++ b/include/libxfs.h @@ -79,6 +79,7 @@ struct iomap; #include "xfs_refcount.h" #include "xfs_btree_staging.h" #include "xfs_imeta.h" +#include "xfs_ag_resv.h" #ifndef ARRAY_SIZE #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) diff --git a/libxfs/libxfs_api_defs.h b/libxfs/libxfs_api_defs.h index 55dcedeb..d4d0c281 100644 --- a/libxfs/libxfs_api_defs.h +++ b/libxfs/libxfs_api_defs.h @@ -21,6 +21,8 @@ #define xfs_ag_init_headers libxfs_ag_init_headers #define xfs_ag_block_count libxfs_ag_block_count +#define xfs_ag_resv_free libxfs_ag_resv_free +#define xfs_ag_resv_init libxfs_ag_resv_init #define xfs_alloc_ag_max_usable libxfs_alloc_ag_max_usable #define xfs_allocbt_maxrecs libxfs_allocbt_maxrecs @@ -121,6 +123,7 @@ #define xfs_highbit32 libxfs_highbit32 #define xfs_highbit64 libxfs_highbit64 #define xfs_ialloc_calc_rootino libxfs_ialloc_calc_rootino +#define xfs_ialloc_read_agi libxfs_ialloc_read_agi #define xfs_icreate libxfs_icreate #define xfs_icreate_args_rootfile libxfs_icreate_args_rootfile #define xfs_idata_realloc libxfs_idata_realloc diff --git a/man/man8/xfs_admin.8 b/man/man8/xfs_admin.8 index ad28e0f6..4f3c882a 100644 --- a/man/man8/xfs_admin.8 +++ b/man/man8/xfs_admin.8 @@ -149,6 +149,36 @@ Upgrade a filesystem to support larger timestamps up to the year 2486. The filesystem cannot be downgraded after this feature is enabled. Once enabled, the filesystem will not be mountable by older kernels. This feature was added to Linux 5.10. +.TP 0.4i +.B finobt +Track free inodes through a separate free inode btree index to speed up inode +allocation on old filesystems. +This upgrade can fail if any AG has less than 1% free space remaining. +The filesystem cannot be downgraded after this feature is enabled. +This feature was added to Linux 3.16. +.TP 0.4i +.B reflink +Enable sharing of file data blocks. +This upgrade can fail if any AG has less than 2% free space remaining. +The filesystem cannot be downgraded after this feature is enabled. +This feature was added to Linux 4.9. +.TP 0.4i +.B rmapbt +Store an index of the owners of on-disk blocks. +This enables much stronger cross-referencing of various metadata structures +and online repairs to space usage metadata. +The filesystem cannot be downgraded after this feature is enabled. +This upgrade can fail if any AG has less than 5% free space remaining. +This feature was added to Linux 4.8. +.TP 0.4i +.B metadir +Create a directory tree of metadata inodes instead of storing them all in the +superblock. +This is required for reverse mapping btrees and reflink support on the realtime +device. +The filesystem cannot be downgraded after this feature is enabled. +This upgrade can fail if any AG has less than 5% free space remaining. +This feature is not upstream yet. .RE .TP .BI \-U " uuid" diff --git a/repair/dino_chunks.c b/repair/dino_chunks.c index bdefef40..160dd4cc 100644 --- a/repair/dino_chunks.c +++ b/repair/dino_chunks.c @@ -963,7 +963,11 @@ next_readbuf: } if (status) { - if (mp->m_sb.sb_rootino == ino) { + if (wipe_pre_metadir_file(ino)) { + if (!ino_discovery) + do_warn( + _("wiping pre-metadir metadata inode %"PRIu64".\n"), ino); + } else if (mp->m_sb.sb_rootino == ino) { need_root_inode = 1; if (!no_modify) { diff --git a/repair/dinode.c b/repair/dinode.c index 758b1a15..0ffb3e6e 100644 --- a/repair/dinode.c +++ b/repair/dinode.c @@ -2386,6 +2386,9 @@ process_dinode_int( ASSERT(uncertain == 0 || verify_mode != 0); ASSERT(ino_bpp != NULL || verify_mode != 0); + if (wipe_pre_metadir_file(lino)) + goto clear_bad_out; + /* * This is the only valid point to check the CRC; after this we may have * made changes which invalidate it, and the CRC is only updated again @@ -2593,7 +2596,7 @@ _("bad (negative) size %" PRId64 " on inode %" PRIu64 "\n"), if (flags & XFS_DIFLAG_NEWRTBM) { /* must be a rt bitmap inode */ if (lino != mp->m_sb.sb_rbmino) { - if (!uncertain) { + if (!uncertain && !add_metadir) { do_warn( _("inode %" PRIu64 " not rt bitmap\n"), lino); diff --git a/repair/globals.c b/repair/globals.c index 7f7bafe3..6e52bac9 100644 --- a/repair/globals.c +++ b/repair/globals.c @@ -50,6 +50,10 @@ int convert_lazy_count; /* Convert lazy-count mode on/off */ int lazy_count; /* What to set if to if converting */ bool add_inobtcount; /* add inode btree counts to AGI */ bool add_bigtime; /* add support for timestamps up to 2486 */ +bool add_finobt; /* add free inode btrees */ +bool add_reflink; /* add reference count btrees */ +bool add_rmapbt; /* add reverse mapping btrees */ +bool add_metadir; /* add metadata directory tree */ /* misc status variables */ diff --git a/repair/globals.h b/repair/globals.h index 1964c18c..6c69413f 100644 --- a/repair/globals.h +++ b/repair/globals.h @@ -91,6 +91,10 @@ extern int convert_lazy_count; /* Convert lazy-count mode on/off */ extern int lazy_count; /* What to set if to if converting */ extern bool add_inobtcount; /* add inode btree counts to AGI */ extern bool add_bigtime; /* add support for timestamps up to 2486 */ +extern bool add_finobt; /* add free inode btrees */ +extern bool add_reflink; /* add reference count btrees */ +extern bool add_rmapbt; /* add reverse mapping btrees */ +extern bool add_metadir; /* add metadata directory tree */ /* misc status variables */ diff --git a/repair/phase2.c b/repair/phase2.c index 51234ee9..cca154d3 100644 --- a/repair/phase2.c +++ b/repair/phase2.c @@ -133,7 +133,8 @@ zero_log( static bool set_inobtcount( - struct xfs_mount *mp) + struct xfs_mount *mp, + struct xfs_sb *new_sb) { if (!xfs_has_crc(mp)) { printf( @@ -153,14 +154,15 @@ set_inobtcount( } printf(_("Adding inode btree counts to filesystem.\n")); - mp->m_sb.sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT; - mp->m_sb.sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; + new_sb->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_INOBTCNT; + new_sb->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; return true; } static bool set_bigtime( - struct xfs_mount *mp) + struct xfs_mount *mp, + struct xfs_sb *new_sb) { if (!xfs_has_crc(mp)) { printf( @@ -174,8 +176,256 @@ set_bigtime( } printf(_("Adding large timestamp support to filesystem.\n")); - mp->m_sb.sb_features_incompat |= (XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | - XFS_SB_FEAT_INCOMPAT_BIGTIME); + new_sb->sb_features_incompat |= (XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR | + XFS_SB_FEAT_INCOMPAT_BIGTIME); + return true; +} + +/* Make sure we can actually upgrade this (v5) filesystem. */ +static void +check_new_v5_geometry( + struct xfs_mount *mp, + struct xfs_sb *new_sb) +{ + struct xfs_sb old_sb; + struct xfs_perag *pag; + xfs_agnumber_t agno; + xfs_ino_t rootino; + int min_logblocks; + int error; + + /* + * Save the current superblock, then copy in the new one to do log size + * and root inode checks. + */ + memcpy(&old_sb, &mp->m_sb, sizeof(struct xfs_sb)); + memcpy(&mp->m_sb, new_sb, sizeof(struct xfs_sb)); + + /* Do we have a big enough log? */ + min_logblocks = libxfs_log_calc_minimum_size(mp); + if (old_sb.sb_logblocks < min_logblocks) { + printf( + _("Filesystem log too small to upgrade filesystem; need %u blocks, have %u.\n"), + min_logblocks, old_sb.sb_logblocks); + exit(0); + } + + rootino = libxfs_ialloc_calc_rootino(mp, new_sb->sb_unit); + if (old_sb.sb_rootino != rootino) { + printf( + _("Cannot upgrade filesystem, root inode (%llu) cannot be moved to %llu.\n"), + (unsigned long long)old_sb.sb_rootino, + (unsigned long long)rootino); + exit(0); + } + + /* Make sure we have enough space for per-AG reservations. */ + for_each_perag(mp, agno, pag) { + struct xfs_trans *tp; + struct xfs_agf *agf; + struct xfs_buf *agi_bp, *agf_bp; + unsigned int avail, agblocks; + + /* + * Create a dummy transaction so that we can load the AGI and + * AGF buffers in memory with the old fs geometry and pin them + * there while we try to make a per-AG reservation with the new + * geometry. + */ + error = -libxfs_trans_alloc_empty(mp, &tp); + if (error) + do_error( + _("Cannot reserve resources for upgrade check, err=%d.\n"), + error); + + error = -libxfs_ialloc_read_agi(mp, tp, agno, &agi_bp); + if (error) + do_error( + _("Cannot read AGI %u for upgrade check, err=%d.\n"), + agno, error); + + error = -libxfs_alloc_read_agf(mp, tp, agno, 0, &agf_bp); + if (error) + do_error( + _("Cannot read AGF %u for upgrade check, err=%d.\n"), + agno, error); + agf = agf_bp->b_addr; + agblocks = be32_to_cpu(agf->agf_length); + + error = -libxfs_ag_resv_init(pag, tp); + if (error == ENOSPC) { + printf( + _("Not enough free space would remain in AG %u for metadata.\n"), + agno); + exit(0); + } + if (error) + do_error( + _("Error %d while checking AG %u space reservation.\n"), + error, agno); + + /* + * Would we have at least 10% free space in this AG after + * making per-AG reservations? + */ + avail = pag->pagf_freeblks + pag->pagf_flcount; + avail -= pag->pag_meta_resv.ar_reserved; + avail -= pag->pag_rmapbt_resv.ar_asked; + if (avail < agblocks / 10) + printf( + _("AG %u will be low on space after upgrade.\n"), + agno); + + libxfs_ag_resv_free(pag); + + /* + * Mark the per-AG structure as uninitialized so that we don't + * trip over stale cached counters after the upgrade, and + * release all the resources. + */ + libxfs_trans_cancel(tp); + pag->pagf_init = 0; + pag->pagi_init = 0; + } + + /* + * Put back the old superblock. + */ + memcpy(&mp->m_sb, &old_sb, sizeof(struct xfs_sb)); +} + +static bool +set_finobt( + struct xfs_mount *mp, + struct xfs_sb *new_sb) +{ + if (!xfs_has_crc(mp)) { + printf( + _("Free inode btree feature only supported on V5 filesystems.\n")); + exit(0); + } + + if (xfs_has_finobt(mp)) { + printf(_("Filesystem already supports free inode btrees.\n")); + exit(0); + } + + printf(_("Adding free inode btrees to filesystem.\n")); + new_sb->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_FINOBT; + new_sb->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; + return true; +} + +static bool +set_reflink( + struct xfs_mount *mp, + struct xfs_sb *new_sb) +{ + if (!xfs_has_crc(mp)) { + printf( + _("Reflink feature only supported on V5 filesystems.\n")); + exit(0); + } + + if (xfs_has_reflink(mp)) { + printf(_("Filesystem already supports reflink.\n")); + exit(0); + } + + printf(_("Adding reflink support to filesystem.\n")); + new_sb->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_REFLINK; + new_sb->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; + return true; +} + +static bool +set_rmapbt( + struct xfs_mount *mp, + struct xfs_sb *new_sb) +{ + if (!xfs_has_crc(mp)) { + printf( + _("Reverse mapping btree feature only supported on V5 filesystems.\n")); + exit(0); + } + + if (xfs_has_reflink(mp)) { + printf( + _("Reverse mapping btrees cannot be added when reflink is enabled.\n")); + exit(0); + } + + if (xfs_has_rmapbt(mp)) { + printf(_("Filesystem already supports reverse mapping btrees.\n")); + exit(0); + } + + printf(_("Adding reverse mapping btrees to filesystem.\n")); + new_sb->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_RMAPBT; + new_sb->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; + return true; +} + +static xfs_ino_t doomed_rbmino = NULLFSINO; +static xfs_ino_t doomed_rsumino = NULLFSINO; +static xfs_ino_t doomed_uquotino = NULLFSINO; +static xfs_ino_t doomed_gquotino = NULLFSINO; +static xfs_ino_t doomed_pquotino = NULLFSINO; + +bool +wipe_pre_metadir_file( + xfs_ino_t ino) +{ + if (ino == doomed_rbmino || + ino == doomed_rsumino || + ino == doomed_uquotino || + ino == doomed_gquotino || + ino == doomed_pquotino) + return true; + return false; +} + +static bool +set_metadir( + struct xfs_mount *mp, + struct xfs_sb *new_sb) +{ + if (!xfs_has_crc(mp)) { + printf( + _("Metadata directory trees only supported on V5 filesystems.\n")); + exit(0); + } + + if (xfs_has_metadir(mp)) { + printf(_("Filesystem already supports metadata directory trees.\n")); + exit(0); + } + + printf(_("Adding metadata directory trees to filesystem.\n")); + new_sb->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_METADIR; + new_sb->sb_features_incompat |= XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR; + + /* Blow out all the old metadata inodes; we'll rebuild in phase6. */ + new_sb->sb_metadirino = new_sb->sb_rootino + 1; + doomed_rbmino = mp->m_sb.sb_rbmino; + doomed_rsumino = mp->m_sb.sb_rsumino; + doomed_uquotino = mp->m_sb.sb_uquotino; + doomed_gquotino = mp->m_sb.sb_gquotino; + doomed_pquotino = mp->m_sb.sb_pquotino; + + new_sb->sb_rbmino = NULLFSINO; + new_sb->sb_rsumino = NULLFSINO; + new_sb->sb_uquotino = NULLFSINO; + new_sb->sb_gquotino = NULLFSINO; + new_sb->sb_pquotino = NULLFSINO; + + /* Indicate that we need a rebuild. */ + need_metadir_inode = 1; + need_rbmino = 1; + need_rsumino = 1; + have_uquotino = 0; + have_gquotino = 0; + have_pquotino = 0; return true; } @@ -184,16 +434,31 @@ static void upgrade_filesystem( struct xfs_mount *mp) { + struct xfs_sb new_sb; struct xfs_buf *bp; bool dirty = false; int error; + memcpy(&new_sb, &mp->m_sb, sizeof(struct xfs_sb)); + if (add_inobtcount) - dirty |= set_inobtcount(mp); + dirty |= set_inobtcount(mp, &new_sb); if (add_bigtime) - dirty |= set_bigtime(mp); - - if (no_modify || !dirty) + dirty |= set_bigtime(mp, &new_sb); + if (add_finobt) + dirty |= set_finobt(mp, &new_sb); + if (add_reflink) + dirty |= set_reflink(mp, &new_sb); + if (add_rmapbt) + dirty |= set_rmapbt(mp, &new_sb); + if (add_metadir) + dirty |= set_metadir(mp, &new_sb); + if (!dirty) + return; + + check_new_v5_geometry(mp, &new_sb); + memcpy(&mp->m_sb, &new_sb, sizeof(struct xfs_sb)); + if (no_modify) return; bp = libxfs_getsb(mp); diff --git a/repair/phase4.c b/repair/phase4.c index 7f23d564..b752b07c 100644 --- a/repair/phase4.c +++ b/repair/phase4.c @@ -303,7 +303,10 @@ phase4(xfs_mount_t *mp) if (xfs_has_metadir(mp) && (is_inode_free(irec, 1) || !inode_isadir(irec, 1))) { need_metadir_inode = true; - if (no_modify) + if (add_metadir) + do_warn( + _("metadata directory root inode needs to be initialized\n")); + else if (no_modify) do_warn( _("metadata directory root inode would be lost\n")); else diff --git a/repair/protos.h b/repair/protos.h index 83734e85..51432703 100644 --- a/repair/protos.h +++ b/repair/protos.h @@ -43,3 +43,4 @@ void phase7(struct xfs_mount *, int); int verify_set_agheader(struct xfs_mount *, struct xfs_buf *, struct xfs_sb *, struct xfs_agf *, struct xfs_agi *, xfs_agnumber_t); +bool wipe_pre_metadir_file(xfs_ino_t ino); diff --git a/repair/rmap.c b/repair/rmap.c index 6a497c30..a72c3b27 100644 --- a/repair/rmap.c +++ b/repair/rmap.c @@ -49,8 +49,8 @@ bool rmap_needs_work( struct xfs_mount *mp) { - return xfs_has_reflink(mp) || - xfs_has_rmapbt(mp); + return xfs_has_reflink(mp) || add_reflink || + xfs_has_rmapbt(mp) || add_rmapbt; } /* diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c index 9fc81a83..95360776 100644 --- a/repair/xfs_repair.c +++ b/repair/xfs_repair.c @@ -67,6 +67,10 @@ enum c_opt_nums { CONVERT_LAZY_COUNT = 0, CONVERT_INOBTCOUNT, CONVERT_BIGTIME, + CONVERT_FINOBT, + CONVERT_REFLINK, + CONVERT_RMAPBT, + CONVERT_METADIR, C_MAX_OPTS, }; @@ -74,6 +78,10 @@ static char *c_opts[] = { [CONVERT_LAZY_COUNT] = "lazycount", [CONVERT_INOBTCOUNT] = "inobtcount", [CONVERT_BIGTIME] = "bigtime", + [CONVERT_FINOBT] = "finobt", + [CONVERT_REFLINK] = "reflink", + [CONVERT_RMAPBT] = "rmapbt", + [CONVERT_METADIR] = "metadir", [C_MAX_OPTS] = NULL, }; @@ -324,6 +332,42 @@ process_args(int argc, char **argv) _("-c bigtime only supports upgrades\n")); add_bigtime = true; break; + case CONVERT_FINOBT: + if (!val) + do_abort( + _("-c finobt requires a parameter\n")); + if (strtol(val, NULL, 0) != 1) + do_abort( + _("-c finobt only supports upgrades\n")); + add_finobt = true; + break; + case CONVERT_REFLINK: + if (!val) + do_abort( + _("-c reflink requires a parameter\n")); + if (strtol(val, NULL, 0) != 1) + do_abort( + _("-c reflink only supports upgrades\n")); + add_reflink = true; + break; + case CONVERT_RMAPBT: + if (!val) + do_abort( + _("-c rmapbt requires a parameter\n")); + if (strtol(val, NULL, 0) != 1) + do_abort( + _("-c rmapbt only supports upgrades\n")); + add_rmapbt = true; + break; + case CONVERT_METADIR: + if (!val) + do_abort( + _("-c metadir requires a parameter\n")); + if (strtol(val, NULL, 0) != 1) + do_abort( + _("-c metadir only supports upgrades\n")); + add_metadir = true; + break; default: unknown('c', val); break; -- 2.30.2