[RFC PATCH] xfs: prevent spoofing of rtbitmap blocks when recovering buffers

"Darrick J. Wong" <djwong@xxxxxxxxxx> · Tue, 27 Jul 2021 12:15:02 -0700

From: Darrick J. Wong <djwong@xxxxxxxxxx>

While reviewing the buffer item recovery code, the thought occurred to
me: in V5 filesystems we use log sequence number (LSN) tracking to avoid
replaying older metadata updates against newer log items.  However, we
use the magic number of the ondisk buffer to find the LSN of the ondisk
metadata, which means that if an attacker can control the layout of the
realtime device precisely enough that the start of an rt bitmap block
matches the magic and UUID of some other kind of block, they can control
the purported LSN of that spoofed block and thereby break log replay.

Since realtime bitmap and summary blocks don't have headers at all, we
have no way to tell if a block really should be replayed.  The best we
can do is replay unconditionally and hope for the best.

XXX: Won't this leave us with a corrupt rtbitmap if recovery also fails?
In other words, the usual problems that happen when you /don't/ track
buffer age with LSNs?  I've noticed that the recoveryloop tests get hung
up on incorrect frextents after a few iterations, but have not had time
to figure out if the rtbitmap recovery is wrong, or if there's something
broken with the old-style summary updates for rt counters.

XXXX: Maybe someone should fix the ondisk format to track the (magic,
blkno, lsn, uuid) like we do everything else in V5?  That's gonna suck
for 64-bit divisions...

Signed-off-by: Darrick J. Wong <djwong@xxxxxxxxxx>
---
 fs/xfs/xfs_buf_item_recover.c |   32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 05fd816edf59..a776bcfdf0c1 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -698,19 +698,29 @@ xlog_recover_do_inode_buffer(
 static xfs_lsn_t
 xlog_recover_get_buf_lsn(
 	struct xfs_mount	*mp,
-	struct xfs_buf		*bp)
+	struct xfs_buf		*bp,
+	struct xfs_buf_log_format *buf_f)
 {
 	uint32_t		magic32;
 	uint16_t		magic16;
 	uint16_t		magicda;
 	void			*blk = bp->b_addr;
 	uuid_t			*uuid;
-	xfs_lsn_t		lsn = -1;
+	uint16_t		blft;
+	xfs_lsn_t		lsn = NULLCOMMITLSN;
 
 	/* v4 filesystems always recover immediately */
 	if (!xfs_sb_version_hascrc(&mp->m_sb))
 		goto recover_immediately;
 
+	/*
+	 * realtime bitmap and summary file blocks do not have magic numbers or
+	 * UUIDs, so we must recover them immediately.
+	 */
+	blft = xfs_blft_from_flags(buf_f);
+	if (blft == XFS_BLFT_RTBITMAP_BUF || blft == XFS_BLFT_RTSUMMARY_BUF)
+		goto recover_immediately;
+
 	magic32 = be32_to_cpu(*(__be32 *)blk);
 	switch (magic32) {
 	case XFS_ABTB_CRC_MAGIC:
@@ -786,7 +796,13 @@ xlog_recover_get_buf_lsn(
 		break;
 	}
 
-	if (lsn != (xfs_lsn_t)-1) {
+	/*
+	 * ondisk buffers should never have a zero LSN, so recover those
+	 * buffers immediately.
+	 */
+	if (!lsn)
+		lsn = NULLCOMMITLSN;
+	if (lsn != NULLCOMMITLSN) {
 		if (!uuid_equal(&mp->m_sb.sb_meta_uuid, uuid))
 			goto recover_immediately;
 		return lsn;
@@ -805,7 +821,9 @@ xlog_recover_get_buf_lsn(
 		break;
 	}
 
-	if (lsn != (xfs_lsn_t)-1) {
+	if (!lsn)
+		lsn = NULLCOMMITLSN;
+	if (lsn != NULLCOMMITLSN) {
 		if (!uuid_equal(&mp->m_sb.sb_uuid, uuid))
 			goto recover_immediately;
 		return lsn;
@@ -834,7 +852,7 @@ xlog_recover_get_buf_lsn(
 	/* unknown buffer contents, recover immediately */
 
 recover_immediately:
-	return (xfs_lsn_t)-1;
+	return NULLCOMMITLSN;
 
 }
 
@@ -920,8 +938,8 @@ xlog_recover_buf_commit_pass2(
 	 * the verifier will be reset to match whatever recover turns that
 	 * buffer into.
 	 */
-	lsn = xlog_recover_get_buf_lsn(mp, bp);
-	if (lsn && lsn != -1 && XFS_LSN_CMP(lsn, current_lsn) >= 0) {
+	lsn = xlog_recover_get_buf_lsn(mp, bp, buf_f);
+	if (lsn != NULLCOMMITLSN && XFS_LSN_CMP(lsn, current_lsn) > 0) {
 		trace_xfs_log_recover_buf_skip(log, buf_f);
 		xlog_recover_validate_buf_type(mp, bp, buf_f, NULLCOMMITLSN);
 		goto out_release;