On Thu, Feb 22, 2018 at 06:00:15PM -0800, Darrick J. Wong wrote: > From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > > Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > --- > fs/xfs/Makefile | 1 > fs/xfs/xfs_fixups.c | 310 +++++++++++++++++++++++++++++++++++++++++++++++++++ > fs/xfs/xfs_fixups.h | 26 ++++ > fs/xfs/xfs_mount.c | 21 +++ > fs/xfs/xfs_super.c | 10 ++ > 5 files changed, 367 insertions(+), 1 deletion(-) > create mode 100644 fs/xfs/xfs_fixups.c > create mode 100644 fs/xfs/xfs_fixups.h > > > diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile > index b03c77e..f88368a 100644 > --- a/fs/xfs/Makefile > +++ b/fs/xfs/Makefile > @@ -86,6 +86,7 @@ xfs-y += xfs_aops.o \ > xfs_extent_busy.o \ > xfs_file.o \ > xfs_filestream.o \ > + xfs_fixups.o \ > xfs_fsmap.o \ > xfs_fsops.o \ > xfs_globals.o \ > diff --git a/fs/xfs/xfs_fixups.c b/fs/xfs/xfs_fixups.c > new file mode 100644 > index 0000000..0cad7bb > --- /dev/null > +++ b/fs/xfs/xfs_fixups.c > @@ -0,0 +1,310 @@ > +/* > + * Copyright (C) 2018 Oracle. All Rights Reserved. > + * > + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version 2 > + * of the License, or (at your option) any later version. > + * > + * This program is distributed in the hope that it would be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write the Free Software Foundation, > + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. > + */ > +#include "xfs.h" > +#include "xfs_fs.h" > +#include "xfs_shared.h" > +#include "xfs_format.h" > +#include "xfs_log_format.h" > +#include "xfs_trans_resv.h" > +#include "xfs_sb.h" > +#include "xfs_mount.h" > +#include "xfs_alloc.h" > +#include "xfs_trans.h" > +#include "xfs_fixups.h" > + > +/* > + * v5 AGFL padding defects > + * > + * When the v5 format was first introduced, there was a defect in the struct > + * xfs_agfl definition that resulted in XFS_AGFL_SIZE returning different > + * values depending on the compiler padding. On a fs with 512-byte sectors, > + * this meant that XFS_AGFL_SIZE was 119 on i386, but 118 on x64. Commit > + * 96f859d52bcb1 ("libxfs: pack the agfl header structure so XFS_AGFL_SIZE is > + * correct") changed the definition to disable padding the end of the > + * structure, and was accepted into Linux 4.5. Since then, the AGFL has > + * always used the larger size (e.g. 119 entries on a 512b sector fs). > + * > + * Unfortunately, pre-4.5 kernels can produce filesystems with AGFLs that wrap > + * at the smaller size, and those kernels are not prepared to handle the > + * longer size. This typically manifests itself as an AGF verifier corruption > + * error followed by a filesystem shutdown. While we encourage admins to stay > + * current with software, we would like to avoid this intermittent breakage. > + * > + * Any v5 filesystem which has a feature bit set for a feature that was > + * introduced after Linux 4.5 will not have this problem, as such kernels > + * cannot be mounted on older kernels. v4 filesystems are also unaffected. > + * > + * Therefore, we add two fixup functions -- the first runs at mount time to > + * detect a short-wrapped AGFL and fix it; the second runs at unmount, freeze, > + * or remount-ro time to move a wrapped AGFL to the beginning of the list. > + * This reduces the likelihood of a screwup to the scenario where you have (a) > + * a filesystem with no post-4.5 features (reflink, rmap), (b) the AGFL wraps, > + * (c) the filesystem goes down leaving a dirty log, and (d) the dirty > + * filesystem is mounted on an old kernel. > + */ > + > +/* > + * Decide if we need to have the agfl wrapping fixes applied. This only > + * affects v5 filesystems that do not have any features enabled that did not > + * exist when the agfl padding fix went in. > + * > + * Features already present when the fix went in were finobt, ftype, spinodes. > + * If we see something new (e.g. reflink) then don't bother. > + */ > +#define XFS_SB_FEAT_RO_COMPAT_AGFL_WRAP_ALREADY_FIXED \ > + (~(XFS_SB_FEAT_RO_COMPAT_FINOBT)) > +#define XFS_SB_FEAT_INCOMPAT_AGFL_WRAP_ALREADY_FIXED \ > + (~(XFS_SB_FEAT_INCOMPAT_FTYPE | \ > + XFS_SB_FEAT_INCOMPAT_SPINODES)) > +#define XFS_SB_FEAT_INCOMPAT_LOG_AGFL_WRAP_ALREADY_FIXED \ > + (~0) > +static inline bool xfs_sb_version_needs_agfl_wrap_fixes(struct xfs_sb *sbp) > +{ > + return xfs_sb_version_hascrc(sbp) && > + !xfs_sb_has_incompat_feature(sbp, > + XFS_SB_FEAT_INCOMPAT_AGFL_WRAP_ALREADY_FIXED) && > + !xfs_sb_has_ro_compat_feature(sbp, > + XFS_SB_FEAT_RO_COMPAT_AGFL_WRAP_ALREADY_FIXED) && > + !xfs_sb_has_incompat_log_feature(sbp, > + XFS_SB_FEAT_INCOMPAT_LOG_AGFL_WRAP_ALREADY_FIXED); > +} > + > +/* > + * Fix an AGFL wrapping that falls short of the end of the block by filling the > + * gap at the end of the block. > + */ > +STATIC int > +xfs_fixup_freelist_wrap_mount( > + struct xfs_trans *tp, > + struct xfs_buf *agfbp, > + struct xfs_perag *pag) > +{ > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_agf *agf; > + struct xfs_buf *agflbp; > + __be32 *agfl_bno; > + xfs_agnumber_t agno; > + uint32_t agfl_size; > + uint32_t flfirst; > + uint32_t fllast; > + int32_t active; > + int offset; > + int len; > + int error; > + > + if (pag->pagf_flcount == 0) > + return 0; > + > + agfl_size = xfs_agfl_size(mp); > + agf = XFS_BUF_TO_AGF(agfbp); > + agno = be32_to_cpu(agf->agf_seqno); > + flfirst = be32_to_cpu(agf->agf_flfirst); > + fllast = be32_to_cpu(agf->agf_fllast); > + > + /* Make sure we're either spot on or off by 1. */ > + active = fllast - flfirst + 1; > + if (active <= 0) > + active += agfl_size; > + if (active == pag->pagf_flcount) > + return 0; > + else if (active != pag->pagf_flcount + 1) > + return -EFSCORRUPTED; > + > + /* Would this have even passed muster on an old system? */ > + if (flfirst >= agfl_size - 1 || fllast >= agfl_size - 1 || > + pag->pagf_flcount > agfl_size - 1) > + return -EFSCORRUPTED; > + > + /* > + * Convert a 40-byte-padded agfl into a 36-byte-padded AGFL. > + * Therefore, we need to move the AGFL blocks > + * bno[flfirst..agfl_size - 2] to bno[flfirst + 1...agfl_size - 1]. > + * > + * Reusing the example above, if we had flfirst == 116, we need > + * to move bno[116] and bno[117] into bno[117] and bno[118], > + * respectively, and then increment flfirst. > + */ > + error = xfs_alloc_read_agfl(mp, tp, agno, &agflbp); > + if (error) > + return error; > + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); > + > + len = (agfl_size - flfirst - 1) * sizeof(xfs_agblock_t); > + memmove(&agfl_bno[flfirst + 1], &agfl_bno[flfirst], len); > + offset = (char *)&agfl_bno[flfirst + 1] - (char *)agflbp->b_addr; > + be32_add_cpu(&agf->agf_flfirst, 1); > + > + xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); > + xfs_trans_log_buf(tp, agflbp, offset, offset + len - 1); > + xfs_trans_brelse(tp, agflbp); > + agflbp = NULL; > + xfs_alloc_log_agf(tp, agfbp, XFS_AGF_FLFIRST); > + > + return 0; > +} > + > +/* > + * Fix an AGFL that touches the end of the block by moving the first or last > + * part of the list elsewhere in the AGFL so that old kernels don't trip over > + * wrapping issues. > + */ > +STATIC int > +xfs_fixup_freelist_wrap_unmount( > + struct xfs_trans *tp, > + struct xfs_buf *agfbp, > + struct xfs_perag *pag) > +{ > + struct xfs_mount *mp = tp->t_mountp; > + struct xfs_agf *agf; > + struct xfs_buf *agflbp; > + __be32 *agfl_bno; > + xfs_agnumber_t agno; > + uint32_t agfl_size; > + uint32_t flfirst; > + uint32_t fllast; > + int offset; > + int len; > + int error; > + > + agfl_size = xfs_agfl_size(mp); > + agf = XFS_BUF_TO_AGF(agfbp); > + agno = be32_to_cpu(agf->agf_seqno); > + flfirst = be32_to_cpu(agf->agf_flfirst); > + fllast = be32_to_cpu(agf->agf_fllast); > + > + /* Empty AGFL? Make sure we aren't pointing at the end. */ > + if (pag->pagf_flcount == 0) { > + if (flfirst >= agfl_size || fllast >= agfl_size) { > + agf->agf_flfirst = cpu_to_be32(1); > + agf->agf_fllast = 0; > + xfs_alloc_log_agf(tp, agfbp, > + XFS_AGF_FLFIRST | XFS_AGF_FLLAST); > + } > + return 0; > + } > + > + /* If we don't hit the end, we're done. */ > + if (flfirst < fllast && fllast != agfl_size - 1) > + return 0; > + > + /* > + * Move a start of a wrapped list towards the start of the agfl block. > + * Therefore, we need to move the AGFL blocks > + * bno[flfirst..agfl_size - 1] to bno[fllast + 1...agfl_size - flfirst]. > + * Then we reset flfirst and fllast appropriately. > + * > + * Reusing the example above, if we had flfirst == 117 and fllast == 4, > + * we need to move bno[117] and bno[118] into bno[5] and bno[6], > + * respectively, and then reset flfirst and fllast. > + * > + * If it's just the last block that touches the end, only move that. > + */ > + error = xfs_alloc_read_agfl(mp, tp, agno, &agflbp); > + if (error) > + return error; > + agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, agflbp); > + > + if (fllast == agfl_size - 1) { > + /* Back the AGFL off from the end of the block. */ > + len = sizeof(xfs_agblock_t); > + agfl_bno[flfirst - 1] = agfl_bno[agfl_size - 1]; > + offset = (char *)&agfl_bno[flfirst - 1] - (char *)agflbp->b_addr; > + be32_add_cpu(&agf->agf_fllast, -1); > + be32_add_cpu(&agf->agf_flfirst, -1); > + } else { > + /* Move the first part of the AGFL towards the front. */ > + len = (agfl_size - flfirst) * sizeof(xfs_agblock_t); > + memcpy(&agfl_bno[fllast + 1], &agfl_bno[flfirst], len); > + offset = (char *)&agfl_bno[fllast + 1] - (char *)agflbp->b_addr; > + agf->agf_flfirst = 0; > + agf->agf_fllast = cpu_to_be32(pag->pagf_flcount - 1); > + } > + > + xfs_trans_buf_set_type(tp, agflbp, XFS_BLFT_AGFL_BUF); > + xfs_trans_log_buf(tp, agflbp, offset, offset + len - 1); > + xfs_trans_brelse(tp, agflbp); > + agflbp = NULL; > + xfs_alloc_log_agf(tp, agfbp, XFS_AGF_FLFIRST | XFS_AGF_FLLAST); > + > + return 0; > +} > + > +typedef int (*xfs_agf_apply_fn_t)(struct xfs_trans *tp, struct xfs_buf *agfbp, > + struct xfs_perag *pag); > + > +/* Apply something to every AGF. */ > +STATIC int > +xfs_fixup_agf_apply( > + struct xfs_mount *mp, > + xfs_agf_apply_fn_t fn) > +{ > + struct xfs_trans *tp; > + struct xfs_perag *pag; > + struct xfs_buf *agfbp; > + xfs_agnumber_t agno; > + int error; > + > + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0, 0, &tp); This can get called when we're in freeze context, so I think this needs to be: error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata, 0, 0, XFS_TRANS_NO_WRITECOUNT, &tp); I saw xfs/119 cough up an error about locking problems and deadlock. --D > + if (error) > + return error; > + > + for (agno = 0; agno < mp->m_sb.sb_agcount; agno++) { > + error = xfs_alloc_read_agf(mp, tp, agno, 0, &agfbp); > + if (error) > + goto cancel; > + if (!agfbp) { > + error = -ENOMEM; > + goto cancel; > + } > + pag = xfs_perag_get(mp, agno); > + error = fn(tp, agfbp, pag); > + xfs_perag_put(pag); > + xfs_trans_brelse(tp, agfbp); > + if (error) > + goto cancel; > + } > + > + return xfs_trans_commit(tp); > +cancel: > + xfs_trans_cancel(tp); > + return error; > +} > + > +/* Fix AGFL wrapping so we can use the filesystem. */ > +int > +xfs_fixup_agfl_wrap_mount( > + struct xfs_mount *mp) > +{ > + if (!xfs_sb_version_needs_agfl_wrap_fixes(&mp->m_sb)) > + return 0; > + > + return xfs_fixup_agf_apply(mp, xfs_fixup_freelist_wrap_mount); > +} > + > +/* Fix AGFL wrapping so old kernels can use this filesystem. */ > +int > +xfs_fixup_agfl_wrap_unmount( > + struct xfs_mount *mp) > +{ > + if (!xfs_sb_version_needs_agfl_wrap_fixes(&mp->m_sb)) > + return 0; > + > + return xfs_fixup_agf_apply(mp, xfs_fixup_freelist_wrap_unmount); > +} > diff --git a/fs/xfs/xfs_fixups.h b/fs/xfs/xfs_fixups.h > new file mode 100644 > index 0000000..fb52a96 > --- /dev/null > +++ b/fs/xfs/xfs_fixups.h > @@ -0,0 +1,26 @@ > +/* > + * Copyright (C) 2018 Oracle. All Rights Reserved. > + * > + * Author: Darrick J. Wong <darrick.wong@xxxxxxxxxx> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; either version 2 > + * of the License, or (at your option) any later version. > + * > + * This program is distributed in the hope that it would be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write the Free Software Foundation, > + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. > + */ > +#ifndef __XFS_FIXUPS_H__ > +#define __XFS_FIXUPS_H__ > + > +int xfs_fixup_agfl_wrap_mount(struct xfs_mount *mp); > +int xfs_fixup_agfl_wrap_unmount(struct xfs_mount *mp); > + > +#endif /* __XFS_FIXUPS_H__ */ > diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c > index 98fd41c..eb284aa 100644 > --- a/fs/xfs/xfs_mount.c > +++ b/fs/xfs/xfs_mount.c > @@ -46,7 +46,7 @@ > #include "xfs_refcount_btree.h" > #include "xfs_reflink.h" > #include "xfs_extent_busy.h" > - > +#include "xfs_fixups.h" > > static DEFINE_MUTEX(xfs_uuid_table_mutex); > static int xfs_uuid_table_size; > @@ -875,6 +875,16 @@ xfs_mountfs( > } > > /* > + * Make sure our AGFL counters do not wrap the end of the block > + * in a troublesome manner. > + */ > + error = xfs_fixup_agfl_wrap_mount(mp); > + if (error) { > + xfs_warn(mp, "Failed to fix agfl wrapping. Run xfs_repair."); > + goto out_log_dealloc; > + } > + > + /* > * Get and sanity-check the root inode. > * Save the pointer to it in the mount structure. > */ > @@ -1128,6 +1138,15 @@ xfs_unmountfs( > xfs_qm_unmount(mp); > > /* > + * Make sure our AGFL counters do not wrap the end of the block > + * in a troublesome manner for old kernels. > + */ > + error = xfs_fixup_agfl_wrap_unmount(mp); > + if (error) > + xfs_warn(mp, "Unable to fix agfl wrapping. " > + "This may cause problems on next mount."); > + > + /* > * Unreserve any blocks we have so that when we unmount we don't account > * the reserved free space as used. This is really only necessary for > * lazy superblock counting because it trusts the incore superblock > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > index 624a802..d9aa39a 100644 > --- a/fs/xfs/xfs_super.c > +++ b/fs/xfs/xfs_super.c > @@ -50,6 +50,7 @@ > #include "xfs_refcount_item.h" > #include "xfs_bmap_item.h" > #include "xfs_reflink.h" > +#include "xfs_fixups.h" > > #include <linux/namei.h> > #include <linux/dax.h> > @@ -1206,6 +1207,15 @@ xfs_quiesce_attr( > xfs_reclaim_inodes(mp, 0); > xfs_reclaim_inodes(mp, SYNC_WAIT); > > + /* > + * Make sure our AGFL counters do not wrap the end of the block > + * in a troublesome manner for old kernels. > + */ > + error = xfs_fixup_agfl_wrap_unmount(mp); > + if (error) > + xfs_warn(mp, "Unable to fix agfl wrapping. " > + "This may cause problems on next mount."); > + > /* Push the superblock and write an unmount record */ > error = xfs_log_sbcount(mp); > if (error) > > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html