From: Darrick J. Wong <darrick.wong@xxxxxxxxxx> While running generic/103, I observed what looks like memory corruption and (with slub debugging turned on) a slub redzone warning on i386 when inactivating an inode with a 64k remote attr value. On a v5 filesystem, maximally sized remote attr values require one block more than 64k worth of space to hold both the remote attribute value header (64 bytes). On a 4k block filesystem this results in a 68k buffer; on a 64k block filesystem, this would be a 128k buffer. Note that even though we'll never use more than 65,600 bytes of this buffer, XFS_MAX_BLOCKSIZE is 64k. This is a problem because the definition of struct xfs_buf_log_format allows for XFS_MAX_BLOCKSIZE worth of dirty bitmap (64k). On i386 when we invalidate a remote attribute, xfs_trans_binval zeroes all 68k worth of the dirty map, writing right off the end of the log item and corrupting memory. We've gotten away with this on x86_64 for years because the compiler inserts a u32 padding on the end of struct xfs_buf_log_format. Fortunately for us, remote attribute values are written to disk with xfs_bwrite(), which is to say that they are not logged. Fix the problem by removing all places where we could end up creating a buffer log item for a remote attribute value and leave a note explaining why. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> --- fs/xfs/libxfs/xfs_attr_remote.c | 66 ++++++++++++++++++++++++++++----------- fs/xfs/libxfs/xfs_attr_remote.h | 1 + fs/xfs/xfs_attr_inactive.c | 29 +---------------- 3 files changed, 51 insertions(+), 45 deletions(-) diff --git a/fs/xfs/libxfs/xfs_attr_remote.c b/fs/xfs/libxfs/xfs_attr_remote.c index a6ef5df42669..6ffe0eb8b422 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.c +++ b/fs/xfs/libxfs/xfs_attr_remote.c @@ -25,6 +25,23 @@ #define ATTR_RMTVALUE_MAPSIZE 1 /* # of map entries at once */ +/* + * Remote Attribute Values + * ======================= + * + * Remote extended attribute values are conceptually simple -- they're written + * to data blocks mapped by an inode's attribute fork, and they have an upper + * size limit of 64k. Setting a value does not involve the XFS log. + * + * However, on a v5 filesystem, maximally sized remote attr values require one + * block more than 64k worth of space to hold both the remote attribute value + * header (64 bytes). On a 4k block filesystem this results in a 68k buffer; + * on a 64k block filesystem, this would be a 128k buffer. Note that the log + * format can only handle a dirty buffer of XFS_MAX_BLOCKSIZE length (64k). + * Therefore, we /must/ ensure that remote attribute value buffers never touch + * the logging system and therefore never have a log item. + */ + /* * Each contiguous block has a header, so it is not just a simple attribute * length to FSB conversion. @@ -401,7 +418,7 @@ xfs_attr_rmtval_get( (map[i].br_startblock != HOLESTARTBLOCK)); dblkno = XFS_FSB_TO_DADDR(mp, map[i].br_startblock); dblkcnt = XFS_FSB_TO_BB(mp, map[i].br_blockcount); - error = xfs_trans_read_buf(mp, args->trans, + error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dblkno, dblkcnt, 0, &bp, &xfs_attr3_rmt_buf_ops); @@ -411,7 +428,7 @@ xfs_attr_rmtval_get( error = xfs_attr_rmtval_copyout(mp, bp, args->dp->i_ino, &offset, &valuelen, &dst); - xfs_trans_brelse(args->trans, bp); + xfs_buf_relse(bp); if (error) return error; @@ -552,6 +569,34 @@ xfs_attr_rmtval_set( return 0; } +/* Mark stale any buffers for the remote value. */ +void +xfs_attr_rmtval_stale( + struct xfs_inode *ip, + struct xfs_bmbt_irec *map) +{ + struct xfs_mount *mp = ip->i_mount; + struct xfs_buf *bp; + xfs_daddr_t dblkno; + int dblkcnt; + + ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); + if (map->br_startblock == HOLESTARTBLOCK) + return; + + dblkno = XFS_FSB_TO_DADDR(mp, map->br_startblock), + dblkcnt = XFS_FSB_TO_BB(mp, map->br_blockcount); + + /* + * If the "remote" value is in the cache, remove it. + */ + bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); + if (bp) { + xfs_buf_stale(bp); + xfs_buf_relse(bp); + } +} + /* * Remove the value associated with an attribute by deleting the * out-of-line buffer that it is stored on. @@ -560,7 +605,6 @@ int xfs_attr_rmtval_remove( struct xfs_da_args *args) { - struct xfs_mount *mp = args->dp->i_mount; xfs_dablk_t lblkno; int blkcnt; int error; @@ -575,9 +619,6 @@ xfs_attr_rmtval_remove( blkcnt = args->rmtblkcnt; while (blkcnt > 0) { struct xfs_bmbt_irec map; - struct xfs_buf *bp; - xfs_daddr_t dblkno; - int dblkcnt; int nmap; /* @@ -592,18 +633,7 @@ xfs_attr_rmtval_remove( ASSERT((map.br_startblock != DELAYSTARTBLOCK) && (map.br_startblock != HOLESTARTBLOCK)); - dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock), - dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount); - - /* - * If the "remote" value is in the cache, remove it. - */ - bp = xfs_buf_incore(mp->m_ddev_targp, dblkno, dblkcnt, XBF_TRYLOCK); - if (bp) { - xfs_buf_stale(bp); - xfs_buf_relse(bp); - bp = NULL; - } + xfs_attr_rmtval_stale(args->dp, &map); lblkno += map.br_blockcount; blkcnt -= map.br_blockcount; diff --git a/fs/xfs/libxfs/xfs_attr_remote.h b/fs/xfs/libxfs/xfs_attr_remote.h index 9d20b66ad379..ce7287ac5b1f 100644 --- a/fs/xfs/libxfs/xfs_attr_remote.h +++ b/fs/xfs/libxfs/xfs_attr_remote.h @@ -11,5 +11,6 @@ int xfs_attr3_rmt_blocks(struct xfs_mount *mp, int attrlen); int xfs_attr_rmtval_get(struct xfs_da_args *args); int xfs_attr_rmtval_set(struct xfs_da_args *args); int xfs_attr_rmtval_remove(struct xfs_da_args *args); +void xfs_attr_rmtval_stale(struct xfs_inode *ip, struct xfs_bmbt_irec *map); #endif /* __XFS_ATTR_REMOTE_H__ */ diff --git a/fs/xfs/xfs_attr_inactive.c b/fs/xfs/xfs_attr_inactive.c index 5ff49523d8ea..20453ce69a4b 100644 --- a/fs/xfs/xfs_attr_inactive.c +++ b/fs/xfs/xfs_attr_inactive.c @@ -36,11 +36,8 @@ xfs_attr3_leaf_freextent( int blkcnt) { struct xfs_bmbt_irec map; - struct xfs_buf *bp; xfs_dablk_t tblkno; - xfs_daddr_t dblkno; int tblkcnt; - int dblkcnt; int nmap; int error; @@ -63,35 +60,13 @@ xfs_attr3_leaf_freextent( ASSERT(nmap == 1); ASSERT(map.br_startblock != DELAYSTARTBLOCK); - /* - * If it's a hole, these are already unmapped - * so there's nothing to invalidate. - */ - if (map.br_startblock != HOLESTARTBLOCK) { - - dblkno = XFS_FSB_TO_DADDR(dp->i_mount, - map.br_startblock); - dblkcnt = XFS_FSB_TO_BB(dp->i_mount, - map.br_blockcount); - bp = xfs_trans_get_buf(*trans, - dp->i_mount->m_ddev_targp, - dblkno, dblkcnt, 0); - if (!bp) - return -ENOMEM; - xfs_trans_binval(*trans, bp); - /* - * Roll to next transaction. - */ - error = xfs_trans_roll_inode(trans, dp); - if (error) - return error; - } + xfs_attr_rmtval_stale(dp, &map); tblkno += map.br_blockcount; tblkcnt -= map.br_blockcount; } - return 0; + return xfs_trans_roll_inode(trans, dp); } /*