From: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> This patch makes recovery.c identical with fast commit kernel changes. Signed-off-by: Harshad Shirwadkar <harshadshirwadkar@xxxxxxxxx> Reviewed-by: Theodore Ts'o <tytso@xxxxxxx> --- debugfs/journal.c | 10 +-- e2fsck/journal.c | 28 ++++-- e2fsck/recovery.c | 190 +++++++++++++++++++++++++++++----------- lib/ext2fs/jfs_compat.h | 19 +++- lib/ext2fs/kernel-jbd.h | 16 +++- 5 files changed, 194 insertions(+), 69 deletions(-) diff --git a/debugfs/journal.c b/debugfs/journal.c index fa72ec57..e8872f05 100644 --- a/debugfs/journal.c +++ b/debugfs/journal.c @@ -378,7 +378,7 @@ try_backup_journal: goto errout; } - journal->j_maxlen = EXT2_I_SIZE(&j_inode->i_ext2) / + journal->j_total_len = EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize; #ifdef USE_INODE_IO @@ -493,7 +493,7 @@ try_backup_journal: brelse(bh); maxlen = ext2fs_blocks_count(&jsuper); - journal->j_maxlen = (maxlen < 1ULL << 32) ? maxlen : + journal->j_total_len = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1; start++; } @@ -629,9 +629,9 @@ static errcode_t ext2fs_journal_load(journal_t *journal) if (jsb->s_blocksize != htonl(journal->j_blocksize)) return EXT2_ET_CORRUPT_JOURNAL_SB; - if (ntohl(jsb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = ntohl(jsb->s_maxlen); - else if (ntohl(jsb->s_maxlen) > journal->j_maxlen) + if (ntohl(jsb->s_maxlen) < journal->j_total_len) + journal->j_total_len = ntohl(jsb->s_maxlen); + else if (ntohl(jsb->s_maxlen) > journal->j_total_len) return EXT2_ET_CORRUPT_JOURNAL_SB; journal->j_tail_sequence = ntohl(jsb->s_sequence); diff --git a/e2fsck/journal.c b/e2fsck/journal.c index 7d9f1b40..2adef76a 100644 --- a/e2fsck/journal.c +++ b/e2fsck/journal.c @@ -379,7 +379,7 @@ static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal) goto errout; } - journal->j_maxlen = EXT2_I_SIZE(&j_inode->i_ext2) / + journal->j_total_len = EXT2_I_SIZE(&j_inode->i_ext2) / journal->j_blocksize; #ifdef USE_INODE_IO @@ -503,7 +503,7 @@ static errcode_t e2fsck_get_journal(e2fsck_t ctx, journal_t **ret_journal) brelse(bh); maxlen = ext2fs_blocks_count(&jsuper); - journal->j_maxlen = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1; + journal->j_total_len = (maxlen < 1ULL << 32) ? maxlen : (1ULL << 32) - 1; start++; } @@ -675,9 +675,9 @@ static errcode_t e2fsck_journal_load(journal_t *journal) return EXT2_ET_CORRUPT_JOURNAL_SB; } - if (ntohl(jsb->s_maxlen) < journal->j_maxlen) - journal->j_maxlen = ntohl(jsb->s_maxlen); - else if (ntohl(jsb->s_maxlen) > journal->j_maxlen) { + if (ntohl(jsb->s_maxlen) < journal->j_total_len) + journal->j_total_len = ntohl(jsb->s_maxlen); + else if (ntohl(jsb->s_maxlen) > journal->j_total_len) { com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB, _("%s: journal too short\n"), ctx->device_name); @@ -688,7 +688,21 @@ static errcode_t e2fsck_journal_load(journal_t *journal) journal->j_transaction_sequence = journal->j_tail_sequence; journal->j_tail = ntohl(jsb->s_start); journal->j_first = ntohl(jsb->s_first); - journal->j_last = ntohl(jsb->s_maxlen); + if (jbd2_has_feature_fast_commit(journal)) { + if (ntohl(jsb->s_maxlen) - jbd_get_num_fc_blks(jsb) + < JBD2_MIN_JOURNAL_BLOCKS) { + com_err(ctx->program_name, EXT2_ET_CORRUPT_JOURNAL_SB, + _("%s: incorrect fast commit blocks\n"), + ctx->device_name); + return EXT2_ET_CORRUPT_JOURNAL_SB; + } + journal->j_fc_last = ntohl(jsb->s_maxlen); + journal->j_last = journal->j_fc_last - + jbd_get_num_fc_blks(jsb); + journal->j_fc_first = journal->j_last + 1; + } else { + journal->j_last = ntohl(jsb->s_maxlen); + } return 0; } @@ -720,7 +734,7 @@ static void e2fsck_journal_reset_super(e2fsck_t ctx, journal_superblock_t *jsb, memset (p, 0, ctx->fs->blocksize-sizeof(journal_header_t)); jsb->s_blocksize = htonl(ctx->fs->blocksize); - jsb->s_maxlen = htonl(journal->j_maxlen); + jsb->s_maxlen = htonl(journal->j_total_len); jsb->s_first = htonl(1); /* Initialize the journal sequence number so that there is "no" diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c index 6c3b7bb4..dc0694fc 100644 --- a/e2fsck/recovery.c +++ b/e2fsck/recovery.c @@ -35,7 +35,6 @@ struct recovery_info int nr_revoke_hits; }; -enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; static int do_one_pass(journal_t *journal, struct recovery_info *info, enum passtype pass); static int scan_revoke_records(journal_t *, struct buffer_head *, @@ -75,8 +74,8 @@ static int do_readahead(journal_t *journal, unsigned int start) /* Do up to 128K of readahead */ max = start + (128 * 1024 / journal->j_blocksize); - if (max > journal->j_maxlen) - max = journal->j_maxlen; + if (max > journal->j_total_len) + max = journal->j_total_len; /* Do the readahead itself. We'll submit MAXBUF buffer_heads at * a time to the block device IO layer. */ @@ -135,7 +134,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal, *bhp = NULL; - if (offset >= journal->j_maxlen) { + if (offset >= journal->j_total_len) { printk(KERN_ERR "JBD2: corrupted journal superblock\n"); return -EFSCORRUPTED; } @@ -180,7 +179,7 @@ static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) if (!jbd2_journal_has_csum_v2or3(j)) return 1; - tail = (struct jbd2_journal_block_tail *)((char *)buf + j->j_blocksize - + tail = (struct jbd2_journal_block_tail *)(buf + j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); provided = tail->t_checksum; tail->t_checksum = 0; @@ -225,10 +224,51 @@ static int count_tags(journal_t *journal, struct buffer_head *bh) /* Make sure we wrap around the log correctly! */ #define wrap(journal, var) \ do { \ - if (var >= (journal)->j_last) \ - var -= ((journal)->j_last - (journal)->j_first); \ + unsigned long _wrap_last = \ + jbd2_has_feature_fast_commit(journal) ? \ + (journal)->j_fc_last : (journal)->j_last; \ + \ + if (var >= _wrap_last) \ + var -= (_wrap_last - (journal)->j_first); \ } while (0) +static int fc_do_one_pass(journal_t *journal, + struct recovery_info *info, enum passtype pass) +{ + unsigned int expected_commit_id = info->end_transaction; + unsigned long next_fc_block; + struct buffer_head *bh; + int err = 0; + + next_fc_block = journal->j_fc_first; + if (!journal->j_fc_replay_callback) + return 0; + + while (next_fc_block <= journal->j_fc_last) { + jbd_debug(3, "Fast commit replay: next block %ld", + next_fc_block); + err = jread(&bh, journal, next_fc_block); + if (err) { + jbd_debug(3, "Fast commit replay: read error"); + break; + } + + jbd_debug(3, "Processing fast commit blk with seq %d"); + err = journal->j_fc_replay_callback(journal, bh, pass, + next_fc_block - journal->j_fc_first, + expected_commit_id); + next_fc_block++; + if (err < 0 || err == JBD2_FC_REPLAY_STOP) + break; + err = 0; + } + + if (err) + jbd_debug(3, "Fast commit replay failed, err = %d\n", err); + + return err; +} + /** * jbd2_journal_recover - recovers a on-disk journal * @journal: the journal to recover @@ -286,7 +326,7 @@ int jbd2_journal_recover(journal_t *journal) err = err2; /* Make sure all replayed data is on permanent storage */ if (journal->j_flags & JBD2_BARRIER) { - err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + err2 = blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL); if (!err) err = err2; } @@ -428,6 +468,8 @@ static int do_one_pass(journal_t *journal, __u32 crc32_sum = ~0; /* Transactional Checksums */ int descr_csum_size = 0; int block_error = 0; + bool need_check_commit_time = false; + __u64 last_trans_commit_time = 0, commit_time; /* * First thing is to establish what we expect to find in the log @@ -470,7 +512,9 @@ static int do_one_pass(journal_t *journal, break; jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", - next_commit_ID, next_log_block, journal->j_last); + next_commit_ID, next_log_block, + jbd2_has_feature_fast_commit(journal) ? + journal->j_fc_last : journal->j_last); /* Skip over each chunk of the transaction looking * either the next descriptor block or the final commit @@ -520,9 +564,21 @@ static int do_one_pass(journal_t *journal, if (descr_csum_size > 0 && !jbd2_descriptor_block_csum_verify(journal, bh->b_data)) { - err = -EFSBADCRC; - brelse(bh); - goto failed; + /* + * PASS_SCAN can see stale blocks due to lazy + * journal init. Don't error out on those yet. + */ + if (pass != PASS_SCAN) { + pr_err("JBD2: Invalid checksum recovering block %lu in log\n", + next_log_block); + err = -EFSBADCRC; + brelse(bh); + goto failed; + } + need_check_commit_time = true; + jbd_debug(1, + "invalid descriptor block found in %lu\n", + next_log_block); } /* If it is a valid descriptor block, replay it @@ -532,6 +588,7 @@ static int do_one_pass(journal_t *journal, if (pass != PASS_REPLAY) { if (pass == PASS_SCAN && jbd2_has_feature_checksum(journal) && + !need_check_commit_time && !info->end_transaction) { if (calc_chksums(journal, bh, &next_log_block, @@ -663,7 +720,7 @@ static int do_one_pass(journal_t *journal, * | GO TO NEXT "Journal Corruption" * | TRANSACTION * | - * {(n+1)th transaction} + * {(n+1)th transanction} * | * _______|______________ * | | @@ -680,21 +737,48 @@ static int do_one_pass(journal_t *journal, * mentioned conditions. Hence assume * "Interrupted Commit".) */ + commit_time = be64_to_cpu( + ((struct commit_header *)bh->b_data)->h_commit_sec); + /* + * If need_check_commit_time is set, it means we are in + * PASS_SCAN and csum verify failed before. If + * commit_time is increasing, it's the same journal, + * otherwise it is stale journal block, just end this + * recovery. + */ + if (need_check_commit_time) { + if (commit_time >= last_trans_commit_time) { + pr_err("JBD2: Invalid checksum found in transaction %u\n", + next_commit_ID); + err = -EFSBADCRC; + brelse(bh); + goto failed; + } + ignore_crc_mismatch: + /* + * It likely does not belong to same journal, + * just end this recovery with success. + */ + jbd_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", + next_commit_ID); + err = 0; + brelse(bh); + goto done; + } - /* Found an expected commit block: if checksums - * are present verify them in PASS_SCAN; else not + /* + * Found an expected commit block: if checksums + * are present, verify them in PASS_SCAN; else not * much to do other than move on to the next sequence - * number. */ + * number. + */ if (pass == PASS_SCAN && jbd2_has_feature_checksum(journal)) { - int chksum_err, chksum_seen; struct commit_header *cbh = (struct commit_header *)bh->b_data; unsigned found_chksum = be32_to_cpu(cbh->h_chksum[0]); - chksum_err = chksum_seen = 0; - if (info->end_transaction) { journal->j_failed_commit = info->end_transaction; @@ -702,42 +786,25 @@ static int do_one_pass(journal_t *journal, break; } - if (crc32_sum == found_chksum && - cbh->h_chksum_type == JBD2_CRC32_CHKSUM && - cbh->h_chksum_size == - JBD2_CRC32_CHKSUM_SIZE) - chksum_seen = 1; - else if (!(cbh->h_chksum_type == 0 && - cbh->h_chksum_size == 0 && - found_chksum == 0 && - !chksum_seen)) - /* - * If fs is mounted using an old kernel and then - * kernel with journal_chksum is used then we - * get a situation where the journal flag has - * checksum flag set but checksums are not - * present i.e chksum = 0, in the individual - * commit blocks. - * Hence to avoid checksum failures, in this - * situation, this extra check is added. - */ - chksum_err = 1; - - if (chksum_err) { - info->end_transaction = next_commit_ID; + /* Neither checksum match nor unused? */ + if (!((crc32_sum == found_chksum && + cbh->h_chksum_type == + JBD2_CRC32_CHKSUM && + cbh->h_chksum_size == + JBD2_CRC32_CHKSUM_SIZE) || + (cbh->h_chksum_type == 0 && + cbh->h_chksum_size == 0 && + found_chksum == 0))) + goto chksum_error; - if (!jbd2_has_feature_async_commit(journal)) { - journal->j_failed_commit = - next_commit_ID; - brelse(bh); - break; - } - } crc32_sum = ~0; } if (pass == PASS_SCAN && !jbd2_commit_block_csum_verify(journal, bh->b_data)) { + chksum_error: + if (commit_time < last_trans_commit_time) + goto ignore_crc_mismatch; info->end_transaction = next_commit_ID; if (!jbd2_has_feature_async_commit(journal)) { @@ -747,11 +814,24 @@ static int do_one_pass(journal_t *journal, break; } } + if (pass == PASS_SCAN) + last_trans_commit_time = commit_time; brelse(bh); next_commit_ID++; continue; case JBD2_REVOKE_BLOCK: + /* + * Check revoke block crc in pass_scan, if csum verify + * failed, check commit block time later. + */ + if (pass == PASS_SCAN && + !jbd2_descriptor_block_csum_verify(journal, + bh->b_data)) { + jbd_debug(1, "JBD2: invalid revoke block found in %lu\n", + next_log_block); + need_check_commit_time = true; + } /* If we aren't in the REVOKE pass, then we can * just skip over this block. */ if (pass != PASS_REVOKE) { @@ -796,6 +876,13 @@ static int do_one_pass(journal_t *journal, success = -EIO; } } + + if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { + err = fc_do_one_pass(journal, info, pass); + if (err) + success = err; + } + if (block_error && success == 0) success = -EIO; return success; @@ -811,7 +898,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, { jbd2_journal_revoke_header_t *header; int offset, max; - unsigned csum_size = 0; + int csum_size = 0; __u32 rcount; int record_len = 4; @@ -819,9 +906,6 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, offset = sizeof(jbd2_journal_revoke_header_t); rcount = be32_to_cpu(header->r_count); - if (!jbd2_descriptor_block_csum_verify(journal, header)) - return -EFSBADCRC; - if (jbd2_journal_has_csum_v2or3(journal)) csum_size = sizeof(struct jbd2_journal_block_tail); if (rcount > journal->j_blocksize - csum_size) diff --git a/lib/ext2fs/jfs_compat.h b/lib/ext2fs/jfs_compat.h index 63ebef99..96fe34a4 100644 --- a/lib/ext2fs/jfs_compat.h +++ b/lib/ext2fs/jfs_compat.h @@ -12,6 +12,7 @@ #else #include <arpa/inet.h> #endif +#include <stdbool.h> #define printk printf #define KERN_ERR "" @@ -66,9 +67,15 @@ static inline __u32 jbd2_chksum(journal_t *j EXT2FS_ATTR((unused)), sizeof(struct __struct), __alignof__(struct __struct),\ (__flags), NULL) -#define blkdev_issue_flush(kdev, a, b) sync_blockdev(kdev) +#define blkdev_issue_flush(kdev, a) sync_blockdev(kdev) #define is_power_of_2(x) ((x) != 0 && (((x) & ((x) - 1)) == 0)) #define pr_emerg(fmt) +#define pr_err(...) + +enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY}; + +#define JBD2_FC_REPLAY_STOP 0 +#define JBD2_FC_REPLAY_CONTINUE 1 struct journal_s { @@ -79,13 +86,16 @@ struct journal_s int j_format_version; unsigned long j_head; unsigned long j_tail; + unsigned long j_fc_first; + unsigned long j_fc_off; + unsigned long j_fc_last; unsigned long j_free; unsigned long j_first, j_last; kdev_t j_dev; kdev_t j_fs_dev; int j_blocksize; unsigned int j_blk_offset; - unsigned int j_maxlen; + unsigned int j_total_len; struct inode * j_inode; tid_t j_tail_sequence; tid_t j_transaction_sequence; @@ -94,6 +104,11 @@ struct journal_s struct jbd2_revoke_table_s *j_revoke_table[2]; tid_t j_failed_commit; __u32 j_csum_seed; + int (*j_fc_replay_callback)(struct journal_s *journal, + struct buffer_head *bh, + enum passtype pass, int off, + tid_t expected_tid); + }; #define is_journal_abort(x) 0 diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h index cb1bc308..6ec1a8c9 100644 --- a/lib/ext2fs/kernel-jbd.h +++ b/lib/ext2fs/kernel-jbd.h @@ -72,8 +72,13 @@ extern void * __jbd_kmalloc (char *where, size_t size, int flags, int retry); __jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry) #define jbd_rep_kmalloc(size, flags) \ __jbd_kmalloc(__FUNCTION__, (size), (flags), 1) +#define jbd_get_num_fc_blks(jsb) \ + (be32_to_cpu((jsb)->s_num_fc_blks) ? \ + be32_to_cpu((jsb)->s_num_fc_blks) : JBD2_DEFAULT_FAST_COMMIT_BLOCKS) + #define JBD2_MIN_JOURNAL_BLOCKS 1024 +#define JBD2_DEFAULT_FAST_COMMIT_BLOCKS 256 /* * Internal structures used by the logging mechanism: @@ -94,6 +99,7 @@ extern void * __jbd_kmalloc (char *where, size_t size, int flags, int retry); #define JBD2_SUPERBLOCK_V1 3 #define JBD2_SUPERBLOCK_V2 4 #define JBD2_REVOKE_BLOCK 5 +#define JBD2_FC_BLOCK 6 /* * Standard header for all descriptor blocks: @@ -233,7 +239,10 @@ typedef struct journal_superblock_s /* 0x0050 */ __u8 s_checksum_type; /* checksum type */ __u8 s_padding2[3]; - __be32 s_padding[42]; +/* 0x0054 */ + __be32 s_num_fc_blks; /* Number of fast commit blocks */ +/* 0x0058 */ + __be32 s_padding[41]; __be32 s_checksum; /* crc32c(superblock) */ /* 0x0100 */ @@ -259,6 +268,7 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT 0x00000004 #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 +#define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES 0 @@ -267,7 +277,8 @@ typedef struct journal_superblock_s JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT| \ JBD2_FEATURE_INCOMPAT_64BIT|\ JBD2_FEATURE_INCOMPAT_CSUM_V2| \ - JBD2_FEATURE_INCOMPAT_CSUM_V3) + JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ + JBD2_FEATURE_INCOMPAT_FAST_COMMIT) #ifdef NO_INLINE_FUNCS extern size_t journal_tag_bytes(journal_t *journal); @@ -384,6 +395,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) +JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS)) /* -- 2.30.0.284.gd98b1dd5eaa7-goog