On Tue, Jun 05, 2018 at 04:40:43PM +1000, Dave Chinner wrote: > From: Dave Chinner <dchinner@xxxxxxxxxx> > > So we don't check the validity of records as we walk the btree. When > there are corrupt records in the free space btree (e.g. zero > startblock/length or beyond EOAG) we just blindly use it and things > go bad from there. That leads to assert failures on debug kernels > like this: > > XFS: Assertion failed: fs_is_ok, file: fs/xfs/libxfs/xfs_alloc.c, line: 450 > .... > Call Trace: > xfs_alloc_fixup_trees+0x368/0x5c0 > xfs_alloc_ag_vextent_near+0x79a/0xe20 > xfs_alloc_ag_vextent+0x1d3/0x330 > xfs_alloc_vextent+0x5e9/0x870 > > Or crashes like this: > > XFS (loop0): xfs_buf_find: daddr 0x7fb28 out of range, EOFS 0x8000 > ..... > BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8 > .... > Call Trace: > xfs_bmap_add_extent_hole_real+0x67d/0x930 > xfs_bmapi_write+0x934/0xc90 > xfs_da_grow_inode_int+0x27e/0x2f0 > xfs_dir2_grow_inode+0x55/0x130 > xfs_dir2_sf_to_block+0x94/0x5d0 > xfs_dir2_sf_addname+0xd0/0x590 > xfs_dir_createname+0x168/0x1a0 > xfs_rename+0x658/0x9b0 > > By checking that free space records pulled from the trees are > within the valid range, we catch many of these corruptions before > they can do damage. > > This is a generic btree record checking deficiency. We need to > validate the records we fetch from all the different btrees before > we use them to catch corruptions like this. > > This patch results in a corrupt record emitting an error message and > returning -EFSCORRUPTED, and the higher layers catch that and abort: > > XFS (loop0): Size Freespace BTree record corruption in AG 0 detected! > XFS (loop0): start block 0x0 block count 0x0 > XFS (loop0): Internal error xfs_trans_cancel at line 1012 of file fs/xfs/xfs_trans.c. Caller xfs_create+0x42a/0x670 > ..... > Call Trace: > dump_stack+0x85/0xcb > xfs_trans_cancel+0x19f/0x1c0 > xfs_create+0x42a/0x670 > xfs_generic_create+0x1f6/0x2c0 > vfs_create+0xf9/0x180 > do_mknodat+0x1f9/0x210 > do_syscall_64+0x5a/0x180 > entry_SYSCALL_64_after_hwframe+0x49/0xbe > ..... > XFS (loop0): xfs_do_force_shutdown(0x8) called from line 1013 of file fs/xfs/xfs_trans.c. Return address = ffffffff81500868 > XFS (loop0): Corruption of in-memory data detected. Shutting down filesystem > > Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> > > --- > V2: > - now with overflow checks > - slightly more robust rmap checks that validate AG header region > properly. > > fs/xfs/libxfs/xfs_alloc.c | 22 ++++++++++++++++++++-- > fs/xfs/libxfs/xfs_ialloc.c | 31 +++++++++++++++++++++++++++++- > fs/xfs/libxfs/xfs_refcount.c | 45 +++++++++++++++++++++++++++++++++++++++----- > fs/xfs/libxfs/xfs_rmap.c | 40 ++++++++++++++++++++++++++++++++++++++- > 4 files changed, 129 insertions(+), 9 deletions(-) > > diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c > index a52ffb835b16..1db50cfc0212 100644 > --- a/fs/xfs/libxfs/xfs_alloc.c > +++ b/fs/xfs/libxfs/xfs_alloc.c > @@ -215,6 +215,8 @@ xfs_alloc_get_rec( > xfs_extlen_t *len, /* output: length of extent */ > int *stat) /* output: success/failure */ > { > + struct xfs_mount *mp = cur->bc_mp; > + xfs_agnumber_t agno = cur->bc_private.a.agno; > union xfs_btree_rec *rec; > int error; > > @@ -222,12 +224,28 @@ xfs_alloc_get_rec( > if (error || !(*stat)) > return error; > if (rec->alloc.ar_blockcount == 0) > - return -EFSCORRUPTED; > + goto out_bad_rec; > > *bno = be32_to_cpu(rec->alloc.ar_startblock); > *len = be32_to_cpu(rec->alloc.ar_blockcount); > > - return error; > + /* check for valid extent range, including overflow */ > + if (!xfs_verify_agbno(mp, agno, *bno)) > + goto out_bad_rec; > + if (*bno > *bno + *len) > + goto out_bad_rec; > + if (!xfs_verify_agbno(mp, agno, *bno + *len - 1)) > + goto out_bad_rec; > + > + return 0; > + > +out_bad_rec: > + xfs_warn(mp, > + "%s Freespace BTree record corruption in AG %d detected!", > + cur->bc_btnum == XFS_BTNUM_BNO ? "Block" : "Size", agno); > + xfs_warn(mp, > + "start block 0x%x block count 0x%x", *bno, *len); > + return -EFSCORRUPTED; > } > > /* > diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c > index ec5ea02b5553..3f551eb29157 100644 > --- a/fs/xfs/libxfs/xfs_ialloc.c > +++ b/fs/xfs/libxfs/xfs_ialloc.c > @@ -121,16 +121,45 @@ xfs_inobt_get_rec( > struct xfs_inobt_rec_incore *irec, > int *stat) > { > + struct xfs_mount *mp = cur->bc_mp; > + xfs_agnumber_t agno = cur->bc_private.a.agno; > union xfs_btree_rec *rec; > int error; > + uint64_t realfree; > > error = xfs_btree_get_rec(cur, &rec, stat); > if (error || *stat == 0) > return error; > > - xfs_inobt_btrec_to_irec(cur->bc_mp, rec, irec); > + xfs_inobt_btrec_to_irec(mp, rec, irec); > + > + if (!xfs_verify_agino(mp, agno, irec->ir_startino)) > + goto out_bad_rec; > + if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT || > + irec->ir_count > XFS_INODES_PER_CHUNK) > + goto out_bad_rec; > + if (irec->ir_freecount > XFS_INODES_PER_CHUNK) > + goto out_bad_rec; > + > + /* if there are no holes, return the first available offset */ > + if (!xfs_inobt_issparse(irec->ir_holemask)) > + realfree = irec->ir_free; > + else > + realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec); > + if (hweight64(realfree) != irec->ir_freecount) > + goto out_bad_rec; > > return 0; > + > +out_bad_rec: > + xfs_warn(mp, > + "%s Inode BTree record corruption in AG %d detected!", > + cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free", agno); > + xfs_warn(mp, > +"start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x", > + irec->ir_startino, irec->ir_count, irec->ir_freecount, > + irec->ir_free, irec->ir_holemask); > + return -EFSCORRUPTED; > } > > /* > diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c > index ed5704c7dcf5..9a2a2004af24 100644 > --- a/fs/xfs/libxfs/xfs_refcount.c > +++ b/fs/xfs/libxfs/xfs_refcount.c > @@ -111,16 +111,51 @@ xfs_refcount_get_rec( > struct xfs_refcount_irec *irec, > int *stat) > { > + struct xfs_mount *mp = cur->bc_mp; > + xfs_agnumber_t agno = cur->bc_private.a.agno; > union xfs_btree_rec *rec; > int error; > + xfs_agblock_t realstart; > > error = xfs_btree_get_rec(cur, &rec, stat); > - if (!error && *stat == 1) { > - xfs_refcount_btrec_to_irec(rec, irec); > - trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, > - irec); > + if (error || !*stat) > + return error; > + > + xfs_refcount_btrec_to_irec(rec, irec); > + > + agno = cur->bc_private.a.agno; > + if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN) > + goto out_bad_rec; > + > + /* handle special COW-staging state */ > + realstart = irec->rc_startblock; > + if (realstart & XFS_REFC_COW_START) { > + if (irec->rc_refcount != 1) > + goto out_bad_rec; > + realstart &= ~XFS_REFC_COW_START; > } If the record does not have the cow "flag" set then the refcount cannot be 1, so this needs an else clause: } else { if (irec->rc_refcount == 1) goto out_bad_rec; } > - return error; > + > + /* check for valid extent range, including overflow */ > + if (!xfs_verify_agbno(mp, agno, realstart)) > + goto out_bad_rec; > + if (realstart > realstart + irec->rc_blockcount) > + goto out_bad_rec; > + if (!xfs_verify_agbno(mp, agno, realstart + irec->rc_blockcount - 1)) > + goto out_bad_rec; > + > + if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT) > + goto out_bad_rec; > + > + trace_xfs_refcount_get(cur->bc_mp, cur->bc_private.a.agno, irec); > + return 0; > + > +out_bad_rec: > + xfs_warn(mp, > + "Refcount BTree record corruption in AG %d detected!", agno); > + xfs_warn(mp, > + "Start block 0x%x, block count 0x%x, references 0x%x", > + irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount); > + return -EFSCORRUPTED; > } > > /* > diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c > index 87711f9af625..e794bcd6591a 100644 > --- a/fs/xfs/libxfs/xfs_rmap.c > +++ b/fs/xfs/libxfs/xfs_rmap.c > @@ -191,6 +191,8 @@ xfs_rmap_get_rec( > struct xfs_rmap_irec *irec, > int *stat) > { > + struct xfs_mount *mp = cur->bc_mp; > + xfs_agnumber_t agno = cur->bc_private.a.agno; > union xfs_btree_rec *rec; > int error; > > @@ -198,7 +200,43 @@ xfs_rmap_get_rec( > if (error || !*stat) > return error; > > - return xfs_rmap_btrec_to_irec(rec, irec); > + if (xfs_rmap_btrec_to_irec(rec, irec)) > + goto out_bad_rec; > + > + if (irec->rm_blockcount == 0) > + goto out_bad_rec; > + if (irec->rm_startblock <= XFS_AGFL_BLOCK(mp)) { > + if (irec->rm_owner != XFS_RMAP_OWN_FS) > + goto out_bad_rec; > + if (irec->rm_blockcount != XFS_AGFL_BLOCK(mp) + 1) > + goto out_bad_rec; > + } else { > + /* check for valid extent range, including overflow */ > + if (!xfs_verify_agbno(mp, agno, irec->rm_startblock)) > + goto out_bad_rec; > + if (irec->rm_startblock > > + irec->rm_startblock + irec->rm_blockcount) > + goto out_bad_rec; > + if (!xfs_verify_agbno(mp, agno, > + irec->rm_startblock + irec->rm_blockcount - 1)) > + goto out_bad_rec; > + } > + > + if (irec->rm_owner == 0) > + goto out_bad_rec; > + if (irec->rm_owner > XFS_MAXINUMBER && > + irec->rm_owner <= XFS_RMAP_OWN_MIN) rm_owner should be (between XFS_RMAP_OWN_FS and XFS_RMAP_OWN_MIN) or (pass xfs_verify_fsino()) since we cannot have inodes between EOFS and MAXINUMBER. > + goto out_bad_rec; > + Should we check the rmap flags here? > + return 0; > +out_bad_rec: > + xfs_warn(mp, > + "RMAP BTree record corruption in AG %d detected!", agno); "RMap" ? Or "Reverse Mapping"? (Consistent capitalization blah blah...) --D > + xfs_warn(mp, > + "Owner 0x%llx, flags 0x%x, start block 0x%x block count 0x%x", > + irec->rm_owner, irec->rm_flags, irec->rm_startblock, > + irec->rm_blockcount); > + return -EFSCORRUPTED; > } > > struct xfs_find_left_neighbor_info { > -- > To unsubscribe from this list: send the line "unsubscribe linux-xfs" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-xfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html