[PATCH 28/36] xfs_db: use inode cluster buffers for inode IO

Dave Chinner <david@xxxxxxxxxxxxx> · Wed, 13 Nov 2013 17:40:52 +1100

From: Dave Chinner <dchinner@xxxxxxxxxx>

When we mount the filesystem inside xfs_db, libxfs is tasked with
reading some information from disk, such as root inodes. Because
libxfs does this inode reading, it uses inode cluster buffers to
read the inodes. xfs_db, OTOH, just uses FSB sized buffers to read
inodes, and hence xfs_db throws a warning when reading the root
inode block like so:

$ sudo xfs_db -c "sb 0" -c "p rootino" -c "inode 32" /dev/vda
Version 5 superblock detected. xfsprogs has EXPERIMENTAL support enabled!
Use of these features is at your own risk!
rootino = 32
7f59f20e6740: Badness in key lookup (length)
bp=(bno 0x20, len 8192 bytes) key=(bno 0x20, len 1024 bytes)
$

There is another way this can happen, and that is dumping raw data
from disk using either the "fsb NNN" or "daddr MMM" commands to dump
untyped information. This is always read in sector or filesystem
block units, and so will cause similar badness warnings.

To avoid this problem when reading inodes, teach xfs_db to read
inode clusters rather individual filesystem blocks when asked to
read an inode.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
Reviewed-by: Christoph Hellwig <hch@xxxxxx>
---
 db/inode.c | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/db/inode.c b/db/inode.c
index 4090855..24170ba 100644
--- a/db/inode.c
+++ b/db/inode.c
@@ -623,6 +623,14 @@ inode_u_symlink_count(
 		(int)be64_to_cpu(dip->di_size) : 0;
 }
 
+/*
+ * We are now using libxfs for our IO backend, so we should always try to use
+ * inode cluster buffers rather than filesystem block sized buffers for reading
+ * inodes. This means that we always use the same buffers as libxfs operations
+ * does, and that avoids buffer cache issues caused by overlapping buffers. This
+ * can be seen clearly when trying to read the root inode. Much of this logic is
+ * similar to libxfs_imap().
+ */
 void
 set_cur_inode(
 	xfs_ino_t	ino)
@@ -632,6 +640,9 @@ set_cur_inode(
 	xfs_agnumber_t	agno;
 	xfs_dinode_t	*dip;
 	int		offset;
+	int		numblks = blkbb;
+	xfs_agblock_t	cluster_agbno;
+
 
 	agno = XFS_INO_TO_AGNO(mp, ino);
 	agino = XFS_INO_TO_AGINO(mp, ino);
@@ -644,6 +655,24 @@ set_cur_inode(
 		return;
 	}
 	cur_agno = agno;
+
+	if (mp->m_inode_cluster_size > mp->m_sb.sb_blocksize &&
+	    mp->m_inoalign_mask) {
+		xfs_agblock_t	chunk_agbno;
+		xfs_agblock_t	offset_agbno;
+		int		blks_per_cluster;
+
+		blks_per_cluster = mp->m_inode_cluster_size >>
+							mp->m_sb.sb_blocklog;
+		offset_agbno = agbno & mp->m_inoalign_mask;
+		chunk_agbno = agbno - offset_agbno;
+		cluster_agbno = chunk_agbno +
+			((offset_agbno / blks_per_cluster) * blks_per_cluster);
+		offset += ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock);
+		numblks = XFS_FSB_TO_BB(mp, blks_per_cluster);
+	} else
+		cluster_agbno = agbno;
+
 	/*
 	 * First set_cur to the block with the inode
 	 * then use off_cur to get the right part of the buffer.
@@ -651,8 +680,8 @@ set_cur_inode(
 	ASSERT(typtab[TYP_INODE].typnm == TYP_INODE);
 
 	/* ingore ring update here, do it explicitly below */
-	set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno),
-		blkbb, DB_RING_IGN, NULL);
+	set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, cluster_agbno),
+		numblks, DB_RING_IGN, NULL);
 	off_cur(offset << mp->m_sb.sb_inodelog, mp->m_sb.sb_inodesize);
 	dip = iocur_top->data;
 	iocur_top->ino_crc_ok = libxfs_dinode_verify(mp, ino, dip);
-- 
1.8.4.rc3

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs