[PATCH] e2fsck: fix 64-bit journal support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I just found what a somewhat disturbing bug which is still present in
1.42.3 --- namely, that the journal replay code in e2fsck was not
properly handling 64-bit file systems correctly, by dropping the high
bits of the block number from the jbd2 descriptor blocks.

I have not had a chance to fully test to make sure we don't have other
bugs hiding in the 64-bit code, but it's clear no one else has had a lot
of time to test it either -- at least not to the extent of doing power
fail testing of > 16TB file systems!   :-(

							- Ted

>From 3b693d0b03569795d04920a04a0a21e5f64ffedc Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@xxxxxxx>
Date: Mon, 21 May 2012 21:30:45 -0400
Subject: [PATCH] e2fsck: fix 64-bit journal support

64-bit journal support was broken; we weren't using the high bits from
the journal descriptor blocks!  We were also using "unsigned long" for
the journal block numbers, which would be a problem on 32-bit systems.

Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx>
---
 e2fsck/jfs_user.h |    4 ++--
 e2fsck/journal.c  |   33 +++++++++++++++++----------------
 e2fsck/recovery.c |   25 ++++++++++++-------------
 3 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/e2fsck/jfs_user.h b/e2fsck/jfs_user.h
index 9e33306..92f8ae2 100644
--- a/e2fsck/jfs_user.h
+++ b/e2fsck/jfs_user.h
@@ -18,7 +18,7 @@ struct buffer_head {
 	e2fsck_t	b_ctx;
 	io_channel 	b_io;
 	int	 	b_size;
-	blk_t	 	b_blocknr;
+	unsigned long long b_blocknr;
 	int	 	b_dirty;
 	int	 	b_uptodate;
 	int	 	b_err;
@@ -121,7 +121,7 @@ _INLINE_ size_t journal_tag_bytes(journal_t *journal)
 /*
  * Kernel compatibility functions are defined in journal.c
  */
-int journal_bmap(journal_t *journal, blk64_t block, unsigned long *phys);
+int journal_bmap(journal_t *journal, blk64_t block, unsigned long long *phys);
 struct buffer_head *getblk(kdev_t ctx, blk64_t blocknr, int blocksize);
 void sync_blockdev(kdev_t kdev);
 void ll_rw_block(int rw, int dummy, struct buffer_head *bh[]);
diff --git a/e2fsck/journal.c b/e2fsck/journal.c
index 915b8bb..bada028 100644
--- a/e2fsck/journal.c
+++ b/e2fsck/journal.c
@@ -44,7 +44,7 @@ static int bh_count = 0;
  * to use the recovery.c file virtually unchanged from the kernel, so we
  * don't have to do much to keep kernel and user recovery in sync.
  */
-int journal_bmap(journal_t *journal, blk64_t block, unsigned long *phys)
+int journal_bmap(journal_t *journal, blk64_t block, unsigned long long *phys)
 {
 #ifdef USE_INODE_IO
 	*phys = block;
@@ -80,8 +80,8 @@ struct buffer_head *getblk(kdev_t kdev, blk64_t blocknr, int blocksize)
 	if (journal_enable_debug >= 3)
 		bh_count++;
 #endif
-	jfs_debug(4, "getblk for block %lu (%d bytes)(total %d)\n",
-		  (unsigned long) blocknr, blocksize, bh_count);
+	jfs_debug(4, "getblk for block %llu (%d bytes)(total %d)\n",
+		  (unsigned long long) blocknr, blocksize, bh_count);
 
 	bh->b_ctx = kdev->k_ctx;
 	if (kdev->k_dev == K_DEV_FS)
@@ -114,38 +114,39 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhp[])
 	for (; nr > 0; --nr) {
 		bh = *bhp++;
 		if (rw == READ && !bh->b_uptodate) {
-			jfs_debug(3, "reading block %lu/%p\n",
-				  (unsigned long) bh->b_blocknr, (void *) bh);
+			jfs_debug(3, "reading block %llu/%p\n",
+				  bh->b_blocknr, (void *) bh);
 			retval = io_channel_read_blk64(bh->b_io,
 						     bh->b_blocknr,
 						     1, bh->b_data);
 			if (retval) {
 				com_err(bh->b_ctx->device_name, retval,
-					"while reading block %lu\n",
-					(unsigned long) bh->b_blocknr);
+					"while reading block %llu\n",
+					bh->b_blocknr);
 				bh->b_err = retval;
 				continue;
 			}
 			bh->b_uptodate = 1;
 		} else if (rw == WRITE && bh->b_dirty) {
-			jfs_debug(3, "writing block %lu/%p\n",
-				  (unsigned long) bh->b_blocknr, (void *) bh);
+			jfs_debug(3, "writing block %llu/%p\n",
+				  bh->b_blocknr,
+				  (void *) bh);
 			retval = io_channel_write_blk64(bh->b_io,
 						      bh->b_blocknr,
 						      1, bh->b_data);
 			if (retval) {
 				com_err(bh->b_ctx->device_name, retval,
-					"while writing block %lu\n",
-					(unsigned long) bh->b_blocknr);
+					"while writing block %llu\n",
+					bh->b_blocknr);
 				bh->b_err = retval;
 				continue;
 			}
 			bh->b_dirty = 0;
 			bh->b_uptodate = 1;
 		} else {
-			jfs_debug(3, "no-op %s for block %lu\n",
+			jfs_debug(3, "no-op %s for block %llu\n",
 				  rw == READ ? "read" : "write",
-				  (unsigned long) bh->b_blocknr);
+				  bh->b_blocknr);
 		}
 	}
 }
@@ -164,8 +165,8 @@ void brelse(struct buffer_head *bh)
 {
 	if (bh->b_dirty)
 		ll_rw_block(WRITE, 1, &bh);
-	jfs_debug(3, "freeing block %lu/%p (total %d)\n",
-		  (unsigned long) bh->b_blocknr, (void *) bh, --bh_count);
+	jfs_debug(3, "freeing block %llu/%p (total %d)\n",
+		  bh->b_blocknr, (void *) bh, --bh_count);
 	ext2fs_free_mem(&bh);
 }
 
@@ -237,7 +238,7 @@ static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal)
 	journal_t		*journal = NULL;
 	errcode_t		retval = 0;
 	io_manager		io_ptr = 0;
-	unsigned long		start = 0;
+	unsigned long long	start = 0;
 	int			ext_journal = 0;
 	int			tried_backup_jnl = 0;
 
diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c
index b669941..e94ef4e 100644
--- a/e2fsck/recovery.c
+++ b/e2fsck/recovery.c
@@ -71,7 +71,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	unsigned long blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -133,7 +133,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	unsigned long blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -309,7 +309,6 @@ int journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
-#if 0
 static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
 {
 	unsigned long long block = be32_to_cpu(tag->t_blocknr);
@@ -317,17 +316,16 @@ static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag
 		block |= (__u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
 }
-#endif
 
 /*
  * calc_chksums calculates the checksums for the blocks described in the
  * descriptor block.
  */
 static int calc_chksums(journal_t *journal, struct buffer_head *bh,
-			unsigned long *next_log_block, __u32 *crc32_sum)
+			unsigned long long *next_log_block, __u32 *crc32_sum)
 {
 	int i, num_blks, err;
-	unsigned long io_block;
+	unsigned long long io_block;
 	struct buffer_head *obh;
 
 	num_blks = count_tags(journal, bh);
@@ -340,7 +338,7 @@ static int calc_chksums(journal_t *journal, struct buffer_head *bh,
 		err = jread(&obh, journal, io_block);
 		if (err) {
 			printk(KERN_ERR "JBD: IO error %d recovering block "
-				"%lu in log\n", err, io_block);
+				"%llu in log\n", err, io_block);
 			return 1;
 		} else {
 			*crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
@@ -355,7 +353,7 @@ static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
 	unsigned int		first_commit_ID, next_commit_ID;
-	unsigned long		next_log_block;
+	unsigned long long	next_log_block;
 	int			err, success = 0;
 	journal_superblock_t *	sb;
 	journal_header_t *	tmp;
@@ -485,7 +483,7 @@ static int do_one_pass(journal_t *journal,
 			tagp = &bh->b_data[sizeof(journal_header_t)];
 			while ((tagp - bh->b_data + tag_bytes)
 			       <= journal->j_blocksize) {
-				unsigned long io_block;
+				unsigned long long io_block;
 
 				tag = (journal_block_tag_t *) tagp;
 				flags = be32_to_cpu(tag->t_flags);
@@ -499,13 +497,14 @@ static int do_one_pass(journal_t *journal,
 					success = err;
 					printk (KERN_ERR
 						"JBD: IO error %d recovering "
-						"block %ld in log\n",
+						"block %llu in log\n",
 						err, io_block);
 				} else {
-					unsigned long blocknr;
+					unsigned long long blocknr;
 
 					J_ASSERT(obh != NULL);
-					blocknr = be32_to_cpu(tag->t_blocknr);
+					blocknr = read_tag_block(tag_bytes,
+								 tag);
 
 					/* If the block has been
 					 * revoked, then we're all done
@@ -733,7 +732,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 		record_len = 8;
 
 	while (offset < max) {
-		unsigned long blocknr;
+		unsigned long long blocknr;
 		int err;
 
 		if (record_len == 4)
-- 
1.7.10.rc3

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux