[PATCH 12/15] xfs_repair: collect reverse-mapping data for refcount/rmap tree rebuilding

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Collect reverse-mapping data for the entire filesystem so that we can
later check and rebuild the reference count tree and the reverse mapping
tree.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 repair/Makefile     |    4 -
 repair/dinode.c     |   42 ++++++++++
 repair/incore.h     |   38 +++++++++
 repair/incore_ino.c |    2 
 repair/phase4.c     |    2 
 repair/rmap.c       |  206 +++++++++++++++++++++++++++++++++++++++++++++++++++
 repair/rmap.h       |   34 ++++++++
 repair/xfs_repair.c |    4 +
 8 files changed, 330 insertions(+), 2 deletions(-)
 create mode 100644 repair/rmap.c
 create mode 100644 repair/rmap.h


diff --git a/repair/Makefile b/repair/Makefile
index 82cba8e..7239a9e 100644
--- a/repair/Makefile
+++ b/repair/Makefile
@@ -11,14 +11,14 @@ LTCOMMAND = xfs_repair
 
 HFILES = agheader.h attr_repair.h avl.h avl64.h bmap.h btree.h \
 	dinode.h dir2.h err_protos.h globals.h incore.h protos.h rt.h \
-	progress.h scan.h versions.h prefetch.h threads.h slab.h
+	progress.h scan.h versions.h prefetch.h threads.h slab.h rmap.h
 
 CFILES = agheader.c attr_repair.c avl.c avl64.c bmap.c btree.c \
 	dino_chunks.c dinode.c dir2.c globals.c incore.c \
 	incore_bmc.c init.c incore_ext.c incore_ino.c phase1.c \
 	phase2.c phase3.c phase4.c phase5.c phase6.c phase7.c \
 	progress.c prefetch.c rt.c sb.c scan.c threads.c \
-	versions.c xfs_repair.c slab.c
+	versions.c xfs_repair.c slab.c rmap.c
 
 LLDLIBS = $(LIBXFS) $(LIBXLOG) $(LIBUUID) $(LIBRT) $(LIBPTHREAD)
 LTDEPENDENCIES = $(LIBXFS) $(LIBXLOG)
diff --git a/repair/dinode.c b/repair/dinode.c
index fc8bc12..e706998 100644
--- a/repair/dinode.c
+++ b/repair/dinode.c
@@ -30,6 +30,8 @@
 #include "attr_repair.h"
 #include "bmap.h"
 #include "threads.h"
+#include "slab.h"
+#include "rmap.h"
 
 /*
  * gettext lookups for translations of strings use mutexes internally to
@@ -720,6 +722,9 @@ _("Fatal error: inode %" PRIu64 " - blkmap_set_ext(): %s\n"
 			 * checking each entry without setting the
 			 * block bitmap
 			 */
+			if (type == XR_INO_DATA &&
+			    xfs_sb_version_hasreflink(&mp->m_sb))
+				goto skip_dup;
 			if (search_dup_extent(agno, agbno, ebno)) {
 				do_warn(
 _("%s fork in ino %" PRIu64 " claims dup extent, "
@@ -729,6 +734,7 @@ _("%s fork in ino %" PRIu64 " claims dup extent, "
 					irec.br_blockcount);
 				goto done;
 			}
+skip_dup:
 			*tot += irec.br_blockcount;
 			continue;
 		}
@@ -768,6 +774,9 @@ _("%s fork in inode %" PRIu64 " claims metadata block %" PRIu64 "\n"),
 			case XR_E_INUSE:
 			case XR_E_MULT:
 				set_bmap_ext(agno, agbno, blen, XR_E_MULT);
+				if (type == XR_INO_DATA &&
+				    xfs_sb_version_hasreflink(&mp->m_sb))
+					break;
 				do_warn(
 _("%s fork in %s inode %" PRIu64 " claims used block %" PRIu64 "\n"),
 					forkname, ftype, ino, b);
@@ -779,6 +788,13 @@ _("illegal state %d in block map %" PRIu64 "\n"),
 					state, b);
 			}
 		}
+		if (collect_rmaps) { /* && !check_dups */
+			error = add_rmap(mp, ino, whichfork, &irec);
+			if (error)
+				do_error(
+_("couldn't add reverse mapping\n")
+					);
+		}
 		*tot += irec.br_blockcount;
 	}
 	error = 0;
@@ -2387,6 +2403,26 @@ _("bad (negative) size %" PRId64 " on inode %" PRIu64 "\n"),
 			flags &= XFS_DIFLAG_ANY;
 		}
 
+		if ((flags & XFS_DIFLAG_REFLINK) &&
+		    !xfs_sb_version_hasreflink(&mp->m_sb)) {
+			if (!uncertain) {
+				do_warn(
+	_("inode %" PRIu64 " is marked reflinked but file system does not support reflink\n"),
+					lino);
+			}
+			goto clear_bad_out;
+		}
+
+		if ((flags & XFS_DIFLAG_REFLINK) &&
+		    (flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT))) {
+			if (!uncertain) {
+				do_warn(
+	_("Cannot have a reflinked realtime inode %" PRIu64 "\n"),
+					lino);
+			}
+			goto clear_bad_out;
+		}
+
 		if (flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) {
 			/* need an rt-dev! */
 			if (!rt_name) {
@@ -2544,6 +2580,12 @@ _("bad non-zero extent size %u for non-realtime/extsize inode %" PRIu64 ", "),
 		goto clear_bad_out;
 
 	/*
+	 * record the state of the reflink flag
+	 */
+	if (collect_rmaps)
+		reflink_record_inode_flag(mp, dino, agno, ino, lino);
+
+	/*
 	 * check data fork -- if it's bad, clear the inode
 	 */
 	if (process_inode_data_fork(mp, agno, ino, dino, type, dirty,
diff --git a/repair/incore.h b/repair/incore.h
index ad19daa..e6c5310 100644
--- a/repair/incore.h
+++ b/repair/incore.h
@@ -282,6 +282,8 @@ typedef struct ino_tree_node  {
 	__uint64_t		ir_sparse;	/* sparse inode bitmask */
 	__uint64_t		ino_confirmed;	/* confirmed bitmask */
 	__uint64_t		ino_isa_dir;	/* bit == 1 if a directory */
+	__uint64_t		ino_was_rl;	/* bit == 1 if reflink flag set */
+	__uint64_t		ino_is_rl;	/* bit == 1 if reflink flag should be set */
 	__uint8_t		nlink_size;
 	union ino_nlink		disk_nlinks;	/* on-disk nlinks, set in P3 */
 	union  {
@@ -493,6 +495,42 @@ static inline bool is_inode_sparse(struct ino_tree_node *irec, int offset)
 }
 
 /*
+ * set/clear/test was inode marked as reflinked
+ */
+static inline void set_inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+	irec->ino_was_rl |= IREC_MASK(offset);
+}
+
+static inline void clear_inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+	irec->ino_was_rl &= ~IREC_MASK(offset);
+}
+
+static inline int inode_was_rl(struct ino_tree_node *irec, int offset)
+{
+	return (irec->ino_was_rl & IREC_MASK(offset)) != 0;
+}
+
+/*
+ * set/clear/test should inode be marked as reflinked
+ */
+static inline void set_inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+	irec->ino_is_rl |= IREC_MASK(offset);
+}
+
+static inline void clear_inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+	irec->ino_is_rl &= ~IREC_MASK(offset);
+}
+
+static inline int inode_is_rl(struct ino_tree_node *irec, int offset)
+{
+	return (irec->ino_is_rl & IREC_MASK(offset)) != 0;
+}
+
+/*
  * add_inode_reached() is set on inode I only if I has been reached
  * by an inode P claiming to be the parent and if I is a directory,
  * the .. link in the I says that P is I's parent.
diff --git a/repair/incore_ino.c b/repair/incore_ino.c
index cda6c2b..dd426aa 100644
--- a/repair/incore_ino.c
+++ b/repair/incore_ino.c
@@ -257,6 +257,8 @@ alloc_ino_node(
 	irec->ino_startnum = starting_ino;
 	irec->ino_confirmed = 0;
 	irec->ino_isa_dir = 0;
+	irec->ino_was_rl = 0;
+	irec->ino_is_rl = 0;
 	irec->ir_free = (xfs_inofree_t) - 1;
 	irec->ir_sparse = 0;
 	irec->ino_un.ex_data = NULL;
diff --git a/repair/phase4.c b/repair/phase4.c
index aa79ae0..2c2cccb 100644
--- a/repair/phase4.c
+++ b/repair/phase4.c
@@ -31,6 +31,7 @@
 #include "dir2.h"
 #include "progress.h"
 
+bool collect_rmaps = false;
 
 /*
  * null out quota inode fields in sb if they point to non-existent inodes.
@@ -169,6 +170,7 @@ phase4(xfs_mount_t *mp)
 	int			ag_hdr_block;
 	int			bstate;
 
+	collect_rmaps = true;
 	ag_hdr_block = howmany(ag_hdr_len, mp->m_sb.sb_blocksize);
 
 	do_log(_("Phase 4 - check for duplicate blocks...\n"));
diff --git a/repair/rmap.c b/repair/rmap.c
new file mode 100644
index 0000000..2e1829c
--- /dev/null
+++ b/repair/rmap.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#include <libxfs.h>
+#include "btree.h"
+#include "err_protos.h"
+#include "libxlog.h"
+#include "incore.h"
+#include "globals.h"
+#include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
+
+#undef RMAP_DEBUG
+
+#ifdef RMAP_DEBUG
+# define dbg_printf(f, a...)  do {printf(f, ## a); fflush(stdout); } while (0)
+#else
+# define dbg_printf(f, a...)
+#endif
+
+/* Reverse mapping observation */
+typedef struct xfs_rmap {
+	xfs_ino_t	rm_ino;		/* inode number */
+	xfs_fileoff_t	rm_startoff;	/* starting file offset */
+	xfs_agblock_t	rm_startblock;	/* starting AG block number */
+	xfs_extlen_t	rm_blockcount;	/* number of AG blocks */
+	struct xfs_rmap	*rm_next;	/* next item in stack */
+} xfs_rmap_t;
+
+/* per-AG rmap object anchor */
+typedef struct xfs_ag_rmap {
+	xfs_slab_t	*ar_rmaps;		/* rmap observations, p4 */
+	xfs_slab_t	*ar_reflink_items;	/* reflink items, p4-5 */
+} xfs_ag_rmap_t;
+
+static xfs_ag_rmap_t *ag_rmaps;
+
+/**
+ * needs_rmap_work() -- Return true if we must reconstruct either the
+ *                      reference count or reverse mapping trees.
+ */
+bool
+needs_rmap_work(
+	xfs_mount_t	*mp)
+{
+	return xfs_sb_version_hasreflink(&mp->m_sb) ||
+	       xfs_sb_version_hasrmapbt(&mp->m_sb);
+}
+
+/**
+ * init_rmaps() -- Initialize per-AG reverse map data.
+ */
+void
+init_rmaps(
+	xfs_mount_t	*mp)
+{
+	xfs_agnumber_t	i;
+	int		error;
+
+	if (!needs_rmap_work(mp))
+		return;
+
+	ag_rmaps = calloc(mp->m_sb.sb_agcount, sizeof(xfs_ag_rmap_t));
+	if (!ag_rmaps)
+		do_error(_("couldn't allocate per-AG reverse map roots\n"));
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		error = init_slab(&ag_rmaps[i].ar_rmaps, sizeof(xfs_rmap_t));
+		if (error)
+			do_error(
+_("Insufficient memory while allocating reverse mapping slabs."));
+		error = init_slab(&ag_rmaps[i].ar_reflink_items,
+				  sizeof(xfs_reflink_rec_incore_t));
+		if (error)
+			do_error(
+_("Insufficient memory while allocating reflink item slabs."));
+	}
+}
+
+/**
+ * free_rmaps() -- Free the per-AG reverse-mapping data.
+ */
+void
+free_rmaps(
+	xfs_mount_t	*mp)
+{
+	xfs_agnumber_t	i;
+
+	if (!needs_rmap_work(mp))
+		return;
+
+	for (i = 0; i < mp->m_sb.sb_agcount; i++) {
+		free_slab(&ag_rmaps[i].ar_rmaps);
+		free_slab(&ag_rmaps[i].ar_reflink_items);
+	}
+	free(ag_rmaps);
+	ag_rmaps = NULL;
+}
+
+/**
+ * add_rmap() -- Add an observation about a physical block mapping for later
+ *               btree reconstruction.
+ *
+ * @mp:	XFS mount object.
+ * @ino: The inode number associated with the extent mapping.
+ * @whichfork: Data or attribute fork?
+ * @irec: The extent mapping to record.
+ */
+int
+add_rmap(
+	xfs_mount_t		*mp,
+	xfs_ino_t		ino,
+	int			whichfork,
+	xfs_bmbt_irec_t		*irec)
+{
+	xfs_slab_t		*rmaps;
+	xfs_rmap_t		rmap;
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+
+	if (!needs_rmap_work(mp))
+		return 0;
+
+	agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock);
+	agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock);
+	ASSERT(agno != NULLAGNUMBER);
+	ASSERT(agno < mp->m_sb.sb_agcount);
+	ASSERT(agbno + irec->br_blockcount <= mp->m_sb.sb_agblocks);
+	ASSERT(ino != NULLFSINO);
+	ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);
+
+	rmaps = ag_rmaps[agno].ar_rmaps;
+	rmap.rm_ino = ino;
+	rmap.rm_startoff = irec->br_startoff;
+	rmap.rm_startblock = agbno;
+	rmap.rm_blockcount = irec->br_blockcount;
+	rmap.rm_next = NULL;
+	return slab_add(rmaps, &rmap);
+}
+
+#ifdef RMAP_DEBUG
+static void
+dump_rmap(
+	const char		*msg,
+	xfs_agnumber_t		agno,
+	xfs_rmap_t		*rmap)
+{
+	printf("%s: %p agno=%u pblk=%llu ino=%llu lblk=%llu len=%u\n", msg,
+		rmap,
+		(unsigned)agno,
+		(unsigned long long)rmap->rm_startblock,
+		(unsigned long long)rmap->rm_ino,
+		(unsigned long long)rmap->rm_startoff,
+		(unsigned)rmap->rm_blockcount);
+}
+#else
+# define dump_rmap(m, a, r)
+#endif
+
+/**
+ * reflink_record_inode_flag() -- Record that an inode had the reflink flag
+ *                                set when repair started.  The inode reflink
+ *                                flag will be adjusted as necessary.
+ * @mp: XFS mount object.
+ * @dino: On-disk inode.
+ * @agno: AG number of the inode.
+ * @ino: AG inode number.
+ * @lino: Full inode number.
+ */
+void
+reflink_record_inode_flag(
+	xfs_mount_t	*mp,
+	xfs_dinode_t	*dino,
+	xfs_agnumber_t	agno,
+	xfs_agino_t	ino,
+	xfs_ino_t	lino)
+{
+	ino_tree_node_t	*irec;
+	int		off;
+
+	ASSERT(XFS_AGINO_TO_INO(mp, agno, ino) == be64_to_cpu(dino->di_ino));
+	if (!(be16_to_cpu(dino->di_flags) & XFS_DIFLAG_REFLINK))
+		return;
+	irec = find_inode_rec(mp, agno, ino);
+	off = get_inode_offset(mp, lino, irec);
+	ASSERT(!inode_was_rl(irec, off));
+	set_inode_was_rl(irec, off);
+	dbg_printf("set was_rl lino=%llu was=0x%llx\n",
+		(unsigned long long)lino, (unsigned long long)irec->ino_was_rl);
+}
diff --git a/repair/rmap.h b/repair/rmap.h
new file mode 100644
index 0000000..16ad157
--- /dev/null
+++ b/repair/rmap.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2015 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+#ifndef RMAP_H_
+#define RMAP_H_
+
+extern bool collect_rmaps;
+
+extern void init_rmaps(xfs_mount_t *mp);
+extern void free_rmaps(xfs_mount_t *mp);
+
+extern int add_rmap(xfs_mount_t *mp, xfs_ino_t ino, int whichfork,
+	xfs_bmbt_irec_t *irec);
+
+extern void reflink_record_inode_flag(xfs_mount_t *mp, xfs_dinode_t *dino,
+	xfs_agnumber_t agno, xfs_agino_t ino, xfs_ino_t lino);
+
+extern bool needs_rmap_work(xfs_mount_t *mp);
+
+#endif /* RMAP_H_ */
diff --git a/repair/xfs_repair.c b/repair/xfs_repair.c
index 07ddd00..3cd288a 100644
--- a/repair/xfs_repair.c
+++ b/repair/xfs_repair.c
@@ -31,6 +31,8 @@
 #include "threads.h"
 #include "progress.h"
 #include "dinode.h"
+#include "slab.h"
+#include "rmap.h"
 
 #define	rounddown(x, y)	(((x)/(y))*(y))
 
@@ -771,6 +773,7 @@ main(int argc, char **argv)
 	init_bmaps(mp);
 	incore_ino_init(mp);
 	incore_ext_init(mp);
+	init_rmaps(mp);
 
 	/* initialize random globals now that we know the fs geometry */
 	inodes_per_block = mp->m_sb.sb_inopblock;
@@ -804,6 +807,7 @@ main(int argc, char **argv)
 	/*
 	 * Done with the block usage maps, toss them...
 	 */
+	free_rmaps(mp);
 	free_bmaps(mp);
 
 	if (!bad_ino_btree)  {

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux