[PATCH 28/32] xfs_db: avoid libxfs buffer lookup warnings

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Dave Chinner <dchinner@xxxxxxxxxx>

When we mount the filesystem inside xfs_db, libxfs is tasked with
reading some information from disk, such as root inodes. Because
libxfs does this inode reading, it uses inode cluster buffers to
read the inodes. xfs_db, OTOH, just uses FSB sized buffers to read
inodes, and hence xfs_db throws a warning when reading the root
inode block like so:

$ sudo xfs_db -c "sb 0" -c "p rootino" -c "inode 32" /dev/vda
Version 5 superblock detected. xfsprogs has EXPERIMENTAL support enabled!
Use of these features is at your own risk!
rootino = 32
7f59f20e6740: Badness in key lookup (length)
bp=(bno 0x20, len 8192 bytes) key=(bno 0x20, len 1024 bytes)
$

There is another way this can happen, and that is dumping raw data
from disk using either the "fsb NNN" or "daddr MMM" commands to dump
untyped information. This is always read in sector or filesystem
block units, and so will cause similar badness warnings.

xfs_db is unique in the way it can read the same blocks with
different lengths, so we really need a way to avoid having duplicate
buffers in the cache. To handle this in a generic way, introduce a
"purge on compare failure" feature to libxfs.

What this feature does is instead of throwing a warning when a
buffer miscompare occurs (e.g. due to a length mismatch), it purges
the buffer that is in cache from the cache. We can do this safely in
the context of xfs_db because it always writes back changes made to
buffers before it releases the reference to the buffer. Hence we can
purge buffers directly from the lookup code without having to worry
about whether they are dirty or not.

Doing this purge on miscompare operation avoids the
problem that libxfs is currently warning about, and hence if the
feature flag is set then we don't need to warn about miscompares any
more. Hence the whole problem goes away entirely for xfs_db, without
affecting any of the other users of libxfs based IO.

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 db/init.c           |  1 +
 db/inode.c          | 33 +++++++++++++++++++--
 db/io.c             |  4 ++-
 include/cache.h     | 22 +++++++++++++-
 include/libxfs.h    |  2 ++
 libxfs/cache.c      | 83 +++++++++++++++++++++++++++++++++++++++--------------
 libxfs/init.c       |  6 ++--
 libxfs/rdwr.c       | 30 ++++++++++---------
 repair/xfs_repair.c |  4 +--
 9 files changed, 143 insertions(+), 42 deletions(-)

diff --git a/db/init.c b/db/init.c
index a9b357b..2b643f9 100644
--- a/db/init.c
+++ b/db/init.c
@@ -109,6 +109,7 @@ init(
 	else
 		x.dname = fsdevice;
 
+	x.bcache_flags = CACHE_MISCOMPARE_PURGE;
 	if (!libxfs_init(&x)) {
 		fputs(_("\nfatal error -- couldn't initialize XFS library\n"),
 			stderr);
diff --git a/db/inode.c b/db/inode.c
index 4090855..24170ba 100644
--- a/db/inode.c
+++ b/db/inode.c
@@ -623,6 +623,14 @@ inode_u_symlink_count(
 		(int)be64_to_cpu(dip->di_size) : 0;
 }
 
+/*
+ * We are now using libxfs for our IO backend, so we should always try to use
+ * inode cluster buffers rather than filesystem block sized buffers for reading
+ * inodes. This means that we always use the same buffers as libxfs operations
+ * does, and that avoids buffer cache issues caused by overlapping buffers. This
+ * can be seen clearly when trying to read the root inode. Much of this logic is
+ * similar to libxfs_imap().
+ */
 void
 set_cur_inode(
 	xfs_ino_t	ino)
@@ -632,6 +640,9 @@ set_cur_inode(
 	xfs_agnumber_t	agno;
 	xfs_dinode_t	*dip;
 	int		offset;
+	int		numblks = blkbb;
+	xfs_agblock_t	cluster_agbno;
+
 
 	agno = XFS_INO_TO_AGNO(mp, ino);
 	agino = XFS_INO_TO_AGINO(mp, ino);
@@ -644,6 +655,24 @@ set_cur_inode(
 		return;
 	}
 	cur_agno = agno;
+
+	if (mp->m_inode_cluster_size > mp->m_sb.sb_blocksize &&
+	    mp->m_inoalign_mask) {
+		xfs_agblock_t	chunk_agbno;
+		xfs_agblock_t	offset_agbno;
+		int		blks_per_cluster;
+
+		blks_per_cluster = mp->m_inode_cluster_size >>
+							mp->m_sb.sb_blocklog;
+		offset_agbno = agbno & mp->m_inoalign_mask;
+		chunk_agbno = agbno - offset_agbno;
+		cluster_agbno = chunk_agbno +
+			((offset_agbno / blks_per_cluster) * blks_per_cluster);
+		offset += ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock);
+		numblks = XFS_FSB_TO_BB(mp, blks_per_cluster);
+	} else
+		cluster_agbno = agbno;
+
 	/*
 	 * First set_cur to the block with the inode
 	 * then use off_cur to get the right part of the buffer.
@@ -651,8 +680,8 @@ set_cur_inode(
 	ASSERT(typtab[TYP_INODE].typnm == TYP_INODE);
 
 	/* ingore ring update here, do it explicitly below */
-	set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, agbno),
-		blkbb, DB_RING_IGN, NULL);
+	set_cur(&typtab[TYP_INODE], XFS_AGB_TO_DADDR(mp, agno, cluster_agbno),
+		numblks, DB_RING_IGN, NULL);
 	off_cur(offset << mp->m_sb.sb_inodelog, mp->m_sb.sb_inodesize);
 	dip = iocur_top->data;
 	iocur_top->ino_crc_ok = libxfs_dinode_verify(mp, ino, dip);
diff --git a/db/io.c b/db/io.c
index 7281148..123214d 100644
--- a/db/io.c
+++ b/db/io.c
@@ -104,8 +104,10 @@ pop_cur(void)
 		dbprintf(_("can't pop anything from I/O stack\n"));
 		return;
 	}
-	if (iocur_top->bp)
+	if (iocur_top->bp) {
 		libxfs_putbuf(iocur_top->bp);
+		iocur_top->bp = NULL;
+	}
 	if (iocur_top->bbmap) {
 		free(iocur_top->bbmap);
 		iocur_top->bbmap = NULL;
diff --git a/include/cache.h b/include/cache.h
index 0c0a1c5..c5757d0 100644
--- a/include/cache.h
+++ b/include/cache.h
@@ -18,6 +18,25 @@
 #ifndef __CACHE_H__
 #define __CACHE_H__
 
+/*
+ * initialisation flags
+ */
+/*
+ * xfs_db always writes changes immediately, and so we need to purge buffers
+ * when we get a buffer lookup mismatch due to readin the same block with a
+ * different buffer configuration.
+ */
+#define CACHE_MISCOMPARE_PURGE	(1 << 0)
+
+/*
+ * cache object campare return values
+ */
+enum {
+	CACHE_HIT,
+	CACHE_MISS,
+	CACHE_PURGE,
+};
+
 #define	HASH_CACHE_RATIO	8
 
 /*
@@ -82,6 +101,7 @@ struct cache_node {
 };
 
 struct cache {
+	int			c_flags;	/* behavioural flags */
 	unsigned int		c_maxcount;	/* max cache nodes */
 	unsigned int		c_count;	/* count of nodes */
 	pthread_mutex_t		c_mutex;	/* node count mutex */
@@ -99,7 +119,7 @@ struct cache {
 	unsigned int 		c_max;		/* max nodes ever used */
 };
 
-struct cache *cache_init(unsigned int, struct cache_operations *);
+struct cache *cache_init(int, unsigned int, struct cache_operations *);
 void cache_destroy(struct cache *);
 void cache_walk(struct cache *, cache_walk_t);
 void cache_purge(struct cache *);
diff --git a/include/libxfs.h b/include/libxfs.h
index d28ac48..049b217 100644
--- a/include/libxfs.h
+++ b/include/libxfs.h
@@ -110,6 +110,8 @@ typedef struct {
 	int             dfd;            /* data subvolume file descriptor */
 	int             logfd;          /* log subvolume file descriptor */
 	int             rtfd;           /* realtime subvolume file descriptor */
+	int		icache_flags;	/* cache init flags */
+	int		bcache_flags;	/* cache init flags */
 } libxfs_init_t;
 
 #define LIBXFS_EXIT_ON_FAILURE	0x0001	/* exit the program if a call fails */
diff --git a/libxfs/cache.c b/libxfs/cache.c
index 56b24e7..84d2860 100644
--- a/libxfs/cache.c
+++ b/libxfs/cache.c
@@ -38,6 +38,7 @@ static unsigned int cache_generic_bulkrelse(struct cache *, struct list_head *);
 
 struct cache *
 cache_init(
+	int			flags,
 	unsigned int		hashsize,
 	struct cache_operations	*cache_operations)
 {
@@ -53,6 +54,7 @@ cache_init(
 		return NULL;
 	}
 
+	cache->c_flags = flags;
 	cache->c_count = 0;
 	cache->c_max = 0;
 	cache->c_hits = 0;
@@ -289,6 +291,34 @@ cache_overflowed(
 	return (cache->c_maxcount == cache->c_max);
 }
 
+
+static int
+__cache_node_purge(
+	struct cache *		cache,
+	struct cache_node *	node)
+{
+	int			count;
+	struct cache_mru *	mru;
+
+	pthread_mutex_lock(&node->cn_mutex);
+	count = node->cn_count;
+	if (count != 0) {
+		pthread_mutex_unlock(&node->cn_mutex);
+		return count;
+	}
+	mru = &cache->c_mrus[node->cn_priority];
+	pthread_mutex_lock(&mru->cm_mutex);
+	list_del_init(&node->cn_mru);
+	mru->cm_count--;
+	pthread_mutex_unlock(&mru->cm_mutex);
+
+	pthread_mutex_unlock(&node->cn_mutex);
+	pthread_mutex_destroy(&node->cn_mutex);
+	list_del_init(&node->cn_hash);
+	cache->relse(node);
+	return count;
+}
+
 /*
  * Lookup in the cache hash table.  With any luck we'll get a cache
  * hit, in which case this will all be over quickly and painlessly.
@@ -308,8 +338,10 @@ cache_node_get(
 	struct cache_mru *	mru;
 	struct list_head *	head;
 	struct list_head *	pos;
+	struct list_head *	n;
 	unsigned int		hashidx;
 	int			priority = 0;
+	int			purged = 0;
 
 	hashidx = cache->hash(key, cache->c_hashsize);
 	hash = cache->c_hash + hashidx;
@@ -317,10 +349,26 @@ cache_node_get(
 
 	for (;;) {
 		pthread_mutex_lock(&hash->ch_mutex);
-		for (pos = head->next; pos != head; pos = pos->next) {
+		for (pos = head->next, n = pos->next; pos != head;
+						pos = n, n = pos->next) {
+			int result;
+
 			node = list_entry(pos, struct cache_node, cn_hash);
-			if (!cache->compare(node, key))
-				continue;
+			result = cache->compare(node, key);
+			switch (result) {
+			case CACHE_HIT:
+				break;
+			case CACHE_PURGE:
+				if ((cache->c_flags & CACHE_MISCOMPARE_PURGE) &&
+				    !__cache_node_purge(cache, node)) {
+					purged++;
+					hash->ch_count--;
+				}
+				/* FALL THROUGH */
+			case CACHE_MISS:
+				goto next_object;
+			}
+
 			/*
 			 * node found, bump node's reference count, remove it
 			 * from its MRU list, and update stats.
@@ -347,6 +395,8 @@ cache_node_get(
 
 			*nodep = node;
 			return 0;
+next_object:
+			continue;	/* what the hell, gcc? */
 		}
 		pthread_mutex_unlock(&hash->ch_mutex);
 		/*
@@ -375,6 +425,12 @@ cache_node_get(
 	list_add(&node->cn_hash, &hash->ch_list);
 	pthread_mutex_unlock(&hash->ch_mutex);
 
+	if (purged) {
+		pthread_mutex_lock(&cache->c_mutex);
+		cache->c_count -= purged;
+		pthread_mutex_unlock(&cache->c_mutex);
+	}
+
 	*nodep = node;
 	return 1;
 }
@@ -457,7 +513,6 @@ cache_node_purge(
 	struct list_head *	pos;
 	struct list_head *	n;
 	struct cache_hash *	hash;
-	struct cache_mru *	mru;
 	int			count = -1;
 
 	hash = cache->c_hash + cache->hash(key, cache->c_hashsize);
@@ -468,23 +523,9 @@ cache_node_purge(
 		if ((struct cache_node *)pos != node)
 			continue;
 
-		pthread_mutex_lock(&node->cn_mutex);
-		count = node->cn_count;
-		if (count != 0) {
-			pthread_mutex_unlock(&node->cn_mutex);
-			break;
-		}
-		mru = &cache->c_mrus[node->cn_priority];
-		pthread_mutex_lock(&mru->cm_mutex);
-		list_del_init(&node->cn_mru);
-		mru->cm_count--;
-		pthread_mutex_unlock(&mru->cm_mutex);
-
-		pthread_mutex_unlock(&node->cn_mutex);
-		pthread_mutex_destroy(&node->cn_mutex);
-		list_del_init(&node->cn_hash);
-		hash->ch_count--;
-		cache->relse(node);
+		count = __cache_node_purge(cache, node);
+		if (!count)
+			hash->ch_count--;
 		break;
 	}
 	pthread_mutex_unlock(&hash->ch_mutex);
diff --git a/libxfs/init.c b/libxfs/init.c
index 229aa50..637f29e 100644
--- a/libxfs/init.c
+++ b/libxfs/init.c
@@ -337,10 +337,12 @@ libxfs_init(libxfs_init_t *a)
 		chdir(curdir);
 	if (!libxfs_ihash_size)
 		libxfs_ihash_size = LIBXFS_IHASHSIZE(sbp);
-	libxfs_icache = cache_init(libxfs_ihash_size, &libxfs_icache_operations);
+	libxfs_icache = cache_init(a->icache_flags, libxfs_ihash_size,
+				   &libxfs_icache_operations);
 	if (!libxfs_bhash_size)
 		libxfs_bhash_size = LIBXFS_BHASHSIZE(sbp);
-	libxfs_bcache = cache_init(libxfs_bhash_size, &libxfs_bcache_operations);
+	libxfs_bcache = cache_init(a->bcache_flags, libxfs_bhash_size,
+				   &libxfs_bcache_operations);
 	use_xfs_buf_lock = a->usebuflock;
 	manage_zones(0);
 	rval = 1;
diff --git a/libxfs/rdwr.c b/libxfs/rdwr.c
index 8d8bcfc..6d6a915 100644
--- a/libxfs/rdwr.c
+++ b/libxfs/rdwr.c
@@ -323,20 +323,24 @@ libxfs_bcompare(struct cache_node *node, cache_key_t key)
 	struct xfs_buf	*bp = (struct xfs_buf *)node;
 	struct xfs_bufkey *bkey = (struct xfs_bufkey *)key;
 
-#ifdef IO_BCOMPARE_CHECK
 	if (bp->b_target->dev == bkey->buftarg->dev &&
-	    bp->b_bn == bkey->blkno &&
-	    bp->b_bcount != BBTOB(bkey->bblen))
-		fprintf(stderr, "%lx: Badness in key lookup (length)\n"
-			"bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
-			pthread_self(),
-			(unsigned long long)bp->b_bn, (int)bp->b_bcount,
-			(unsigned long long)bkey->blkno, BBTOB(bkey->bblen));
+	    bp->b_bn == bkey->blkno) {
+		if (bp->b_bcount == BBTOB(bkey->bblen))
+			return CACHE_HIT;
+#ifdef IO_BCOMPARE_CHECK
+		if (!(libxfs_bcache->c_flags & CACHE_MISCOMPARE_PURGE)) {
+			fprintf(stderr,
+	"%lx: Badness in key lookup (length)\n"
+	"bp=(bno 0x%llx, len %u bytes) key=(bno 0x%llx, len %u bytes)\n",
+				pthread_self(),
+				(unsigned long long)bp->b_bn, (int)bp->b_bcount,
+				(unsigned long long)bkey->blkno,
+				BBTOB(bkey->bblen));
+		}
 #endif
-
-	return (bp->b_target->dev == bkey->buftarg->dev &&
-		bp->b_bn == bkey->blkno &&
-		bp->b_bcount == BBTOB(bkey->bblen));
+		return CACHE_PURGE;
+	}
+	return CACHE_MISS;
 }
 
 void
@@ -1029,7 +1033,7 @@ libxfs_icompare(struct cache_node *node, cache_key_t key)
 {
 	xfs_inode_t	*ip = (xfs_inode_t *)node;
 
-	return (ip->i_ino == *(xfs_ino_t *)key);
+	return (ip->i_ino == *(xfs_ino_t *)key) ? CACHE_HIT : CACHE_MISS;
 }
 
 int
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 820e7a2..55a451b 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -712,9 +712,9 @@ main(int argc, char **argv)
 		if (!ihash_option_used)
 			libxfs_ihash_size = libxfs_bhash_size;
 
-		libxfs_icache = cache_init(libxfs_ihash_size,
+		libxfs_icache = cache_init(0, libxfs_ihash_size,
 						&libxfs_icache_operations);
-		libxfs_bcache = cache_init(libxfs_bhash_size,
+		libxfs_bcache = cache_init(0, libxfs_bhash_size,
 						&libxfs_bcache_operations);
 	}
 
-- 
1.8.3.2

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs




[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux