It turns out that there are some serious problems with the on-disk format of journal checksum v2. The foremost is that the function to calculate descriptor tag size returns sizes that are too big. This causes alignment issues on some architectures and is compounded by the fact that some parts of jbd2 use the structure size (incorrectly) to determine the presence of a 64bit journal instead of checking the feature flags. These errors regrettably lead to the journal corruption reported by Mr. Reardon. Therefore, introduce journal checksum v3, which enlarges the descriptor block tag format to allow for full 32-bit checksums of journal blocks, fix the journal tag function to return the correct sizes, and fix the jbd2 recovery code to use feature flags to determine 64bitness. Add a few function helpers so we don't have to open-code quite so many pieces. Switching to a 16-byte block size was found to increase journal size overhead by a maximum of 0.1%, to convert a 32-bit journal with no checksumming to a 32-bit journal with checksum v3 enabled. Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx> Reported-by: TR Reardon <thomas_reardon@xxxxxxxxxxx> --- debugfs/jfs_user.h | 21 +++++++++++++++++++++ debugfs/logdump.c | 12 +++++++----- e2fsck/journal.c | 14 +++++++++----- e2fsck/recovery.c | 26 +++++++++++++++----------- e2fsck/revoke.c | 4 ++-- lib/e2p/feature.c | 2 ++ lib/ext2fs/kernel-jbd.h | 44 ++++++++++++++++++++++++++++++++++---------- misc/dumpe2fs.c | 12 ++++++++---- 8 files changed, 98 insertions(+), 37 deletions(-) diff --git a/debugfs/jfs_user.h b/debugfs/jfs_user.h index 3070cd5..f287553 100644 --- a/debugfs/jfs_user.h +++ b/debugfs/jfs_user.h @@ -5,4 +5,25 @@ typedef unsigned short kdev_t; #include <ext2fs/kernel-jbd.h> +#define JSB_HAS_INCOMPAT_FEATURE(jsb, mask) \ + ((jsb)->s_header.h_blocktype == ext2fs_cpu_to_be32(JFS_SUPERBLOCK_V2) && \ + ((jsb)->s_feature_incompat & ext2fs_cpu_to_be32((mask)))) +static inline size_t journal_super_tag_bytes(journal_superblock_t *jsb) +{ + size_t sz; + + if (JSB_HAS_INCOMPAT_FEATURE(jsb, JFS_FEATURE_INCOMPAT_CSUM_V3)) + return sizeof(journal_block_tag3_t); + + sz = sizeof(journal_block_tag_t); + + if (JSB_HAS_INCOMPAT_FEATURE(jsb, JFS_FEATURE_INCOMPAT_CSUM_V2)) + sz += sizeof(__u16); + + if (JSB_HAS_INCOMPAT_FEATURE(jsb, JFS_FEATURE_INCOMPAT_64BIT)) + return sz; + else + return sz - sizeof(__u32); +} + #endif /* _JFS_USER_H */ diff --git a/debugfs/logdump.c b/debugfs/logdump.c index 9f9594f..70a7c36 100644 --- a/debugfs/logdump.c +++ b/debugfs/logdump.c @@ -476,19 +476,21 @@ static void dump_descriptor_block(FILE *out_file, unsigned int *blockp, int blocksize, tid_t transaction) { - int offset, tag_size = JBD_TAG_SIZE32; + int offset, tag_size, csum_size = 0; char *tagp; journal_block_tag_t *tag; unsigned int blocknr; __u32 tag_block; __u32 tag_flags; - if (be32_to_cpu(jsb->s_feature_incompat) & JFS_FEATURE_INCOMPAT_64BIT) - tag_size = JBD_TAG_SIZE64; - + tag_size = journal_super_tag_bytes(jsb); offset = sizeof(journal_header_t); blocknr = *blockp; + if (JSB_HAS_INCOMPAT_FEATURE(jsb, JFS_FEATURE_INCOMPAT_CSUM_V3) || + JSB_HAS_INCOMPAT_FEATURE(jsb, JFS_FEATURE_INCOMPAT_CSUM_V2)) + csum_size = sizeof(struct journal_block_tail); + if (dump_all) fprintf(out_file, "Dumping descriptor block, sequence %u, at " "block %u:\n", transaction, blocknr); @@ -505,7 +507,7 @@ static void dump_descriptor_block(FILE *out_file, /* ... and if we have gone too far, then we've reached the end of this block. */ - if (offset > blocksize) + if (offset > blocksize - csum_size) break; tag_block = be32_to_cpu(tag->t_blocknr); diff --git a/e2fsck/journal.c b/e2fsck/journal.c index 533b1d6..84e3a26 100644 --- a/e2fsck/journal.c +++ b/e2fsck/journal.c @@ -44,7 +44,7 @@ static int bh_count = 0; static int e2fsck_journal_verify_csum_type(journal_t *j, journal_superblock_t *jsb) { - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 1; return jsb->s_checksum_type == JBD2_CRC32C_CHKSUM; @@ -68,7 +68,7 @@ static int e2fsck_journal_sb_csum_verify(journal_t *j, { __u32 provided, calculated; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 1; provided = ext2fs_be32_to_cpu(jsb->s_checksum); @@ -82,7 +82,7 @@ static errcode_t e2fsck_journal_sb_csum_set(journal_t *j, { __u32 crc; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 0; crc = e2fsck_journal_sb_csum(jsb); @@ -624,15 +624,19 @@ static errcode_t e2fsck_journal_load(journal_t *journal) return EXT2_ET_RO_UNSUPP_FEATURE; /* Checksum v1 and v2 are mutually exclusive features. */ - if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2) && + if (journal_has_csum_v2or3(journal) && JFS_HAS_COMPAT_FEATURE(journal, JFS_FEATURE_COMPAT_CHECKSUM)) return EXT2_ET_CORRUPT_SUPERBLOCK; + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2) && + JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V3)) + return EXT2_ET_CORRUPT_SUPERBLOCK; + if (!e2fsck_journal_verify_csum_type(journal, jsb) || !e2fsck_journal_sb_csum_verify(journal, jsb)) return EXT2_ET_CORRUPT_SUPERBLOCK; - if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (journal_has_csum_v2or3(journal)) journal->j_csum_seed = jbd2_chksum(journal, ~0, jsb->s_uuid, sizeof(jsb->s_uuid)); diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c index 0e194be..b63bbb8 100644 --- a/e2fsck/recovery.c +++ b/e2fsck/recovery.c @@ -181,7 +181,7 @@ static int jbd2_descr_block_csum_verify(journal_t *j, __u32 provided; __u32 calculated; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 1; tail = (struct journal_block_tail *)(buf + j->j_blocksize - @@ -205,7 +205,7 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) int nr = 0, size = journal->j_blocksize; int tag_bytes = journal_tag_bytes(journal); - if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (journal_has_csum_v2or3(journal)) size -= sizeof(struct journal_block_tail); tagp = &bh->b_data[sizeof(journal_header_t)]; @@ -338,10 +338,11 @@ int journal_skip_recovery(journal_t *journal) return err; } -static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag) +static inline unsigned long long read_tag_block(journal_t *journal, + journal_block_tag_t *tag) { unsigned long long block = ext2fs_be32_to_cpu(tag->t_blocknr); - if (tag_bytes > JFS_TAG_SIZE32) + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) block |= (u64)ext2fs_be32_to_cpu(tag->t_blocknr_high) << 32; return block; } @@ -384,7 +385,7 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) __u32 provided; __u32 calculated; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 1; h = buf; @@ -399,17 +400,21 @@ static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, void *buf, __u32 sequence) { + journal_block_tag3_t *tag3 = (journal_block_tag3_t *)tag; __u32 csum32; __u32 seq; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 1; seq = ext2fs_cpu_to_be32(sequence); csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); - return tag->t_checksum == ext2fs_cpu_to_be16(csum32); + if (JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V3)) + return tag3->t_checksum == ext2fs_cpu_to_be32(csum32); + else + return tag->t_checksum == ext2fs_cpu_to_be16(csum32); } static int do_one_pass(journal_t *journal, @@ -513,8 +518,7 @@ static int do_one_pass(journal_t *journal, switch(blocktype) { case JFS_DESCRIPTOR_BLOCK: /* Verify checksum first */ - if (JFS_HAS_INCOMPAT_FEATURE(journal, - JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (journal_has_csum_v2or3(journal)) descr_csum_size = sizeof(struct journal_block_tail); if (descr_csum_size > 0 && @@ -575,7 +579,7 @@ static int do_one_pass(journal_t *journal, unsigned long long blocknr; J_ASSERT(obh != NULL); - blocknr = read_tag_block(tag_bytes, + blocknr = read_tag_block(journal, tag); /* If the block has been @@ -814,7 +818,7 @@ static int jbd2_revoke_block_csum_verify(journal_t *j, __u32 provided; __u32 calculated; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return 1; tail = (struct journal_revoke_tail *)(buf + j->j_blocksize - diff --git a/e2fsck/revoke.c b/e2fsck/revoke.c index 383164e..b4c3f5f 100644 --- a/e2fsck/revoke.c +++ b/e2fsck/revoke.c @@ -597,7 +597,7 @@ static void write_one_revoke_record(journal_t *journal, offset = *offsetp; /* Do we need to leave space at the end for a checksum? */ - if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (journal_has_csum_v2or3(journal)) csum_size = sizeof(struct journal_revoke_tail); /* Make sure we have a descriptor with space left for the record */ @@ -644,7 +644,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh) struct journal_revoke_tail *tail; __u32 csum; - if (!JFS_HAS_INCOMPAT_FEATURE(j, JFS_FEATURE_INCOMPAT_CSUM_V2)) + if (!journal_has_csum_v2or3(j)) return; tail = (struct journal_revoke_tail *)(bh->b_data + j->j_blocksize - diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c index d0e29b8..6e53cfe 100644 --- a/lib/e2p/feature.c +++ b/lib/e2p/feature.c @@ -112,6 +112,8 @@ static struct feature jrnl_feature_list[] = { "journal_async_commit" }, { E2P_FEATURE_INCOMPAT, JFS_FEATURE_INCOMPAT_CSUM_V2, "journal_checksum_v2" }, + { E2P_FEATURE_INCOMPAT, JFS_FEATURE_INCOMPAT_CSUM_V3, + "journal_checksum_v3" }, { 0, 0, 0 }, }; diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h index 407f4a5..28a7a10 100644 --- a/lib/ext2fs/kernel-jbd.h +++ b/lib/ext2fs/kernel-jbd.h @@ -131,7 +131,11 @@ typedef struct journal_header_s * journal_block_tag (in the descriptor). The other h_chksum* fields are * not used. * - * Checksum v1 and v2 are mutually exclusive features. + * If FEATURE_INCOMPAT_CSUM_V3 is set, the descriptor block uses + * journal_block_tag3_t to store a full 32-bit checksum. Everything else + * is the same as v2. + * + * Checksum v1, v2, and v3 are mutually exclusive features. */ struct commit_header { __u32 h_magic; @@ -148,6 +152,14 @@ struct commit_header { /* * The block tag: used to describe a single buffer in the journal */ +typedef struct journal_block_tag3_s +{ + __u32 t_blocknr; /* The on-disk block number */ + __u32 t_flags; /* See below */ + __u32 t_blocknr_high; /* most-significant high 32bits. */ + __u32 t_checksum; /* crc32c(uuid+seq+block) */ +} journal_block_tag3_t; + typedef struct journal_block_tag_s { __u32 t_blocknr; /* The on-disk block number */ @@ -156,9 +168,6 @@ typedef struct journal_block_tag_s __u32 t_blocknr_high; /* most-significant high 32bits. */ } journal_block_tag_t; -#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t)) -#define JBD_TAG_SIZE32 (8) - /* Tail of descriptor block, for checksumming */ struct journal_block_tail { __u32 t_checksum; @@ -257,6 +266,7 @@ typedef struct journal_superblock_s #define JFS_FEATURE_INCOMPAT_64BIT 0x00000002 #define JFS_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JFS_FEATURE_INCOMPAT_CSUM_V2 0x00000008 +#define JFS_FEATURE_INCOMPAT_CSUM_V3 0x00000010 /* Features known to this kernel version: */ #define JFS_KNOWN_COMPAT_FEATURES 0 @@ -264,7 +274,8 @@ typedef struct journal_superblock_s #define JFS_KNOWN_INCOMPAT_FEATURES (JFS_FEATURE_INCOMPAT_REVOKE|\ JFS_FEATURE_INCOMPAT_ASYNC_COMMIT|\ JFS_FEATURE_INCOMPAT_64BIT|\ - JFS_FEATURE_INCOMPAT_CSUM_V2) + JFS_FEATURE_INCOMPAT_CSUM_V2|\ + JFS_FEATURE_INCOMPAT_CSUM_V3) #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS)) #ifdef E2FSCK_INCLUDE_INLINE_FUNCS @@ -290,16 +301,29 @@ typedef struct journal_superblock_s */ _INLINE_ size_t journal_tag_bytes(journal_t *journal) { - journal_block_tag_t tag; - size_t x = 0; + size_t sz; + + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V3)) + return sizeof(journal_block_tag3_t); + + sz = sizeof(journal_block_tag_t); if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2)) - x += sizeof(tag.t_checksum); + sz += sizeof(__u16); if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_64BIT)) - return x + JBD_TAG_SIZE64; + return sz; else - return x + JBD_TAG_SIZE32; + return sz - sizeof(__u32); +} + +_INLINE_ int journal_has_csum_v2or3(journal_t *journal) +{ + if (JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V2) || + JFS_HAS_INCOMPAT_FEATURE(journal, JFS_FEATURE_INCOMPAT_CSUM_V3)) + return 1; + + return 0; } #undef _INLINE_ #endif diff --git a/misc/dumpe2fs.c b/misc/dumpe2fs.c index 7cbb99b..9924285 100644 --- a/misc/dumpe2fs.c +++ b/misc/dumpe2fs.c @@ -415,8 +415,10 @@ static void print_inline_journal_information(ext2_filsys fs) if (jsb->s_feature_compat & ext2fs_cpu_to_be32(JFS_FEATURE_COMPAT_CHECKSUM)) printf("%s", _("Journal checksum type: crc32\n")); - if (jsb->s_feature_incompat & - ext2fs_cpu_to_be32(JFS_FEATURE_INCOMPAT_CSUM_V2)) + if ((jsb->s_feature_incompat & + ext2fs_cpu_to_be32(JFS_FEATURE_INCOMPAT_CSUM_V2)) || + (jsb->s_feature_incompat & + ext2fs_cpu_to_be32(JFS_FEATURE_INCOMPAT_CSUM_V2))) printf(_("Journal checksum type: %s\n" "Journal checksum: 0x%08x\n"), journal_checksum_type_str(jsb->s_checksum_type), @@ -454,8 +456,10 @@ static void print_journal_information(ext2_filsys fs) if (jsb->s_feature_compat & ext2fs_cpu_to_be32(JFS_FEATURE_COMPAT_CHECKSUM)) printf("%s", _("Journal checksum type: crc32\n")); - if (jsb->s_feature_incompat & - ext2fs_cpu_to_be32(JFS_FEATURE_INCOMPAT_CSUM_V2)) + if ((jsb->s_feature_incompat & + ext2fs_cpu_to_be32(JFS_FEATURE_INCOMPAT_CSUM_V2)) || + (jsb->s_feature_incompat & + ext2fs_cpu_to_be32(JFS_FEATURE_INCOMPAT_CSUM_V2))) printf(_("Journal checksum type: %s\n" "Journal checksum: 0x%08x\n"), journal_checksum_type_str(jsb->s_checksum_type), -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html