From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Introduce helper functions to scan and fix the AGFLs at mount time. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_alloc.c | 169 ++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_alloc.h | 3 + fs/xfs/libxfs/xfs_format.h | 5 + fs/xfs/xfs_mount.c | 10 +++ fs/xfs/xfs_super.c | 10 +++ 5 files changed, 197 insertions(+) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 36101e5..8e4e1ae 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -70,6 +70,175 @@ xfs_agfl_size( return size / sizeof(xfs_agblock_t); } +/* + * Detect and fixup a screwup in the struct xfs_agfl definition that results in + * different free list sizes depending on the compiler padding added to the + * xfs_agfl. This will only matter on v5 filesystems that have the freelist + * wrapping around the end of the AGFL. If we fix the problem, we log the new + * AGF immediately; if the caller fixes any AGFLs, then it should set the + * superblock flag to say we've fixed everything. + * + * This function returns 1 if something was fixed, 0 if nothing needed fixing, + * or the usual negative error code. + */ +int +xfs_agf_fixup_freelist_count( + struct xfs_trans *tp, + struct xfs_buf *agfbp, + struct xfs_perag *pag) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_agf *agf; + struct xfs_buf *agflbp; + __be32 *agfl_bno; + xfs_agnumber_t agno; + uint32_t agfl_size; + uint32_t flfirst; + uint32_t fllast; + int32_t active; + int offset; + int error; + + /* v4 or fixed-agfl fs doesn't need fixing */ + if (!xfs_sb_version_hascrc(&mp->m_sb) || + xfs_sb_version_hasfixedagfl(&mp->m_sb)) + return 0; + + agfl_size = xfs_agfl_size(mp); + + agf = XFS_BUF_TO_AGF(agfbp); + agno = be32_to_cpu(agf->agf_seqno); + flfirst = be32_to_cpu(agf->agf_flfirst); + fllast = be32_to_cpu(agf->agf_fllast); + + /* + * In the original v5 code, we had a structure padding error in struct + * xfs_agfl such that XFS_AGFL_SIZE thought the structure size was 36 + * on 32-bit and 40 on 64-bit. On a 512b sector fs this lead to + * ((512 - 36) / 4) = 119 agfl entries on 32bit, whereas on 64-bit + * ((512 - 40) / 4) = 118 agfl entries. This could lead to problems + * if you migrated filesystems between 32bit and 64bit machines. + * + * In Linux 4.5 we fixed the padding problem such that the structure + * size is always 36 bytes (119 entries in the above example), but + * this still leads to verifier problems because flfirst/fllast have + * to match flcount even if they wrap around the end. This was also + * problematic because there were v5 filesystems already deployed, and + * some of them have the 40-byte padding (i.e. 118 entries). + * + * Now we're introducing a FIXED_AGFL feature that means that we've + * fixed any wrapping problems and we'll never do that again. Better + * yet, old kernels won't be touching the AGFL because it's a rocompat + * feature. We retain the 36-byte padding. + */ + + /* Empty AGFL? Make sure we aren't pointing at the end. */ + if (pag->pagf_flcount == 0) { + if (flfirst >= agfl_size || fllast >= agfl_size) { + agf->agf_flfirst = cpu_to_be32(1); + agf->agf_fllast = 0; + xfs_alloc_log_agf(tp, agfbp, + XFS_AGF_FLFIRST | XFS_AGF_FLLAST); + return 1; + } + return 0; + } + + /* Make sure we're either spot on or off by 1. */ + active = fllast - flfirst + 1; + if (active <= 0) + active += agfl_size; + if (active == pag->pagf_flcount) + return 0; + else if (active != pag->pagf_flcount + 1) + return -EFSCORRUPTED; + + /* Would this have even passed muster on an old system? */ + if (flfirst >= agfl_size - 1 || fllast >= agfl_size - 1 || + pag->pagf_flcount > agfl_size - 1) + return -EFSCORRUPTED; + + /* + * Convert a 40-byte-padded agfl into a 36-byte-padded AGFL. + * Therefore, we need to move the AGFL blocks + * bno[flfirst..agfl_size - 2] to bno[flfirst + 1...agfl_size - 1]. + * + * Reusing the example above, if we had flfirst == 116, we need + * to move bno[116] and bno[117] into bno[117] and bno[118], + * respectively, and then increment flfirst. + */ + error = xfs_alloc_read_agfl(mp, tp, agno, &agflbp); + if (error) + return error; + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); + memmove(&agfl_bno[flfirst + 1], &agfl_bno[flfirst], + (agfl_size - flfirst - 1) * sizeof(xfs_agblock_t)); + agfl_bno[flfirst] = cpu_to_be32(NULLAGBLOCK); + xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); + offset = (char *)&agfl_bno[flfirst] - (char *)agflbp->b_addr; + xfs_trans_log_buf(tp, agflbp, offset, BBTOB(agflbp->b_length) - 1); + xfs_trans_brelse(tp, agflbp); + agflbp = NULL; + + be32_add_cpu(&agf->agf_flfirst, 1); + xfs_alloc_log_agf(tp, agfbp, XFS_AGF_FLFIRST); + + return 1; +} + +/* Fix all the AGFL counters. */ +int +xfs_agf_fixup_freelist_counts( + struct xfs_mount *mp) +{ + struct xfs_trans *tp; + struct xfs_perag *pag; + struct xfs_buf *agfbp; + xfs_agnumber_t agno; + bool needed_fixing = false; + int error; + + if (!xfs_sb_version_hascrc(&mp->m_sb) || + xfs_sb_version_hasfixedagfl(&mp->m_sb)) + return 0; + + /* Need to be able to log all AGFs + primary super */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0, 0, &tp); + if (error) + return error; + + /* Make sure all the AGFLs do not go off the end of the array. */ + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agfbp); + if (error) + goto cancel; + if (!agfbp) { + error = -ENOMEM; + goto cancel; + } + pag = xfs_perag_get(mp, agno); + error = xfs_agf_fixup_freelist_count(tp, agfbp, pag); + xfs_perag_put(pag); + xfs_trans_brelse(tp, agfbp); + if (error == 1) + needed_fixing = true; + else if (error < 0) + goto cancel; + } + + if (!needed_fixing) + goto cancel; + + /* Add the feature flag so we don't have to do this again. */ + xfs_sb_version_addfixedagfl(&mp->m_sb); + xfs_log_sb(tp); + return xfs_trans_commit(tp); + +cancel: + xfs_trans_cancel(tp); + return error; +} + unsigned int xfs_refc_block( struct xfs_mount *mp) diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index a311a24..2c79ea4e 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -27,6 +27,9 @@ struct xfs_trans; extern struct workqueue_struct *xfs_alloc_wq; unsigned int xfs_agfl_size(struct xfs_mount *mp); +int xfs_agf_fixup_freelist_count(struct xfs_trans *tp, struct xfs_buf *agfbp, + struct xfs_perag *pag); +int xfs_agf_fixup_freelist_counts(struct xfs_mount *mp); /* * Freespace allocation types. Argument to xfs_alloc_[v]extent. diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 4f6f41b..6a7d65f 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -566,6 +566,11 @@ static inline bool xfs_sb_version_hasfixedagfl(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL); } +static inline void xfs_sb_version_addfixedagfl(struct xfs_sb *sbp) +{ + sbp->sb_features_ro_compat |= XFS_SB_FEAT_RO_COMPAT_FIXED_AGFL; +} + /* * end of superblock version macros */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 98fd41c..62865aa 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -875,6 +875,16 @@ xfs_mountfs( } /* + * Make sure our AGFL counters do not wrap the end of the block in a + * troublesome manner. Don't bother if we're readonly. + */ + if (!(mp->m_flags & XFS_MOUNT_RDONLY)) { + error = xfs_agf_fixup_freelist_counts(mp); + if (error) + goto out_log_dealloc; + } + + /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 7aba628..5fa0501 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1353,6 +1353,16 @@ xfs_fs_remount( } /* + * Make sure our AGFL counters do not wrap the end of the block + * in a troublesome manner. + */ + error = xfs_agf_fixup_freelist_counts(mp); + if (error) { + xfs_warn(mp, "failed to set FIXED_AGFL feature"); + return error; + } + + /* * Fill out the reserve pool if it is empty. Use the stashed * value if it is non-zero, otherwise go with the default. */ -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html