On May 27, 2021, at 2:57 AM, Artem Blagodarenko <artem.blagodarenko@xxxxxxxxx> wrote: > > E2fsck interrupts journal replay if a broken transaction is found. > Journal is replayed partly. Some useful transactions are not replayed. > > Let's change this behavior and process all transactions except broken ones > if "-E repair_journal" option is set. In the past we discussed using the per-block checksums in the journal (which I think are now widely available, maybe only with metacsum?) to detect which blocks in the journal are corrupted, and only skip replay of those blocks. If the same block is overwritten in a later journal replay phase (e.g. common case for bitmaps, group descriptors, busy inodes, etc), then no extra action is needed by e2fsck (in theory, though we may still want to run an e2fsck to find *other* corruption). The kernel could probably do the same, to avoid aborting a mount due to small journal corruption. If the corrupted blocks are bitmaps, then the filesystem could maybe still continue to mount, with those groups marked EXT4_GROUP_INFO_BBITMAP_CORRUPT or EXT4_GROUP_INFO_IBITMAP_CORRUPT so they are skipped during allocation, and only mark the filesystem in error. If there are other blocks corrupted, the mount should probably still fail. Cheers, Andreas > > HPE-bug-id: LUS-9452 > --- > e2fsck/e2fsck.h | 1 + > e2fsck/journal.c | 3 +++ > e2fsck/recovery.c | 41 +++++++++++++++++++++++++++++++---------- > e2fsck/unix.c | 3 +++ > lib/e2p/feature.c | 2 ++ > lib/ext2fs/kernel-jbd.h | 5 ++++- > misc/ext4.5.in | 4 ++++ > 7 files changed, 48 insertions(+), 11 deletions(-) > > diff --git a/e2fsck/e2fsck.h b/e2fsck/e2fsck.h > index 15d043ee..7dccc8e4 100644 > --- a/e2fsck/e2fsck.h > +++ b/e2fsck/e2fsck.h > @@ -179,6 +179,7 @@ struct resource_track { > #define E2F_OPT_UNSHARE_BLOCKS 0x40000 > #define E2F_OPT_CLEAR_UNINIT 0x80000 /* Hack to clear the uninit bit */ > #define E2F_OPT_CHECK_ENCODING 0x100000 /* Force verification of encoded filenames */ > +#define E2F_OPT_REPAIR_JOURNAL 0x200000 /* Apply all possible from journal */ > > /* > * E2fsck flags > diff --git a/e2fsck/journal.c b/e2fsck/journal.c > index a425bbd1..9b84c477 100644 > --- a/e2fsck/journal.c > +++ b/e2fsck/journal.c > @@ -1620,6 +1620,9 @@ static errcode_t recover_ext3_journal(e2fsck_t ctx) > if (retval) > return retval; > > + if (ctx->options & E2F_OPT_REPAIR_JOURNAL) > + jbd2_set_feature_repair(journal); > + > retval = e2fsck_journal_load(journal); > if (retval) > goto errout; > diff --git a/e2fsck/recovery.c b/e2fsck/recovery.c > index dc0694fc..5aa3e529 100644 > --- a/e2fsck/recovery.c > +++ b/e2fsck/recovery.c > @@ -33,6 +33,7 @@ struct recovery_info > int nr_replays; > int nr_revokes; > int nr_revoke_hits; > + int nr_skipped; > }; > > static int do_one_pass(journal_t *journal, > @@ -313,8 +314,9 @@ int jbd2_journal_recover(journal_t *journal) > jbd_debug(1, "JBD2: recovery, exit status %d, " > "recovered transactions %u to %u\n", > err, info.start_transaction, info.end_transaction); > - jbd_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", > - info.nr_replays, info.nr_revoke_hits, info.nr_revokes); > + jbd_debug(1, "JBD2: Replayed %d, skipped %d, and revoked %d/%d blocks\n", > + info.nr_replays, info.nr_skipped, info.nr_revoke_hits, > + info.nr_revokes); > > /* Restart the log at the next transaction ID, thus invalidating > * any existing commit records in the log. */ > @@ -787,16 +789,35 @@ static int do_one_pass(journal_t *journal, > } > > /* Neither checksum match nor unused? */ > - if (!((crc32_sum == found_chksum && > - cbh->h_chksum_type == > - JBD2_CRC32_CHKSUM && > - cbh->h_chksum_size == > - JBD2_CRC32_CHKSUM_SIZE) || > - (cbh->h_chksum_type == 0 && > - cbh->h_chksum_size == 0 && > - found_chksum == 0))) > + if (cbh->h_chksum_type == 0 && > + cbh->h_chksum_size == 0 && > + found_chksum == 0) > goto chksum_error; > > + if (!(crc32_sum = found_chksum && > + cbh->h_chksum_type == JBD2_CRC32_CHKSUM && > + cbh->h_chksum_size == > + JBD2_CRC32_CHKSUM_SIZE)) { > + if (jbd2_has_feature_repair(journal)) { > + /* > + * Commit with wrong checksum. > + * Let's skip it. There are > + * some corect commits after. > + */ > + ++info->nr_skipped; > + jbd_debug(1, "JBD2: " > + "crc32_sum(0x%x)i !=" > + " found_chksum(0x%x)" > + ". Skipped.\n", > + crc32_sum, > + found_chksum); > + brelse(bh); > + next_commit_ID++; > + } else { > + goto chksum_error; > + } > + } > + > crc32_sum = ~0; > } > if (pass == PASS_SCAN && > diff --git a/e2fsck/unix.c b/e2fsck/unix.c > index c5f9e441..fc7649fc 100644 > --- a/e2fsck/unix.c > +++ b/e2fsck/unix.c > @@ -763,6 +763,9 @@ static void parse_extended_opts(e2fsck_t ctx, const char *opts) > ctx->options |= E2F_OPT_CLEAR_UNINIT; > continue; > #endif > + } else if (strcmp(token, "repair_journal") == 0) { > + ctx->options |= E2F_OPT_REPAIR_JOURNAL; > + continue; > } else { > fprintf(stderr, _("Unknown extended option: %s\n"), > token); > diff --git a/lib/e2p/feature.c b/lib/e2p/feature.c > index 22910602..6cd11384 100644 > --- a/lib/e2p/feature.c > +++ b/lib/e2p/feature.c > @@ -134,6 +134,8 @@ static struct feature jrnl_feature_list[] = { > "journal_checksum_v2" }, > { E2P_FEATURE_INCOMPAT, JBD2_FEATURE_INCOMPAT_CSUM_V3, > "journal_checksum_v3" }, > + { E2P_FEATURE_INCOMPAT, JBD2_FEATURE_INCOMPAT_REPAIR, > + "journal_repair" }, > { 0, 0, 0 }, > }; > > diff --git a/lib/ext2fs/kernel-jbd.h b/lib/ext2fs/kernel-jbd.h > index 2978ccb6..4a2cef20 100644 > --- a/lib/ext2fs/kernel-jbd.h > +++ b/lib/ext2fs/kernel-jbd.h > @@ -265,6 +265,7 @@ typedef struct journal_superblock_s > #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 > #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 > #define JBD2_FEATURE_INCOMPAT_FAST_COMMIT 0x00000020 > +#define JBD2_FEATURE_INCOMPAT_REPAIR 0x00000040 > > /* Features known to this kernel version: */ > #define JBD2_KNOWN_COMPAT_FEATURES 0 > @@ -274,7 +275,8 @@ typedef struct journal_superblock_s > JBD2_FEATURE_INCOMPAT_64BIT|\ > JBD2_FEATURE_INCOMPAT_CSUM_V2| \ > JBD2_FEATURE_INCOMPAT_CSUM_V3 | \ > - JBD2_FEATURE_INCOMPAT_FAST_COMMIT) > + JBD2_FEATURE_INCOMPAT_FAST_COMMIT | \ > + JBD2_FEATURE_INCOMPAT_REPAIR) > > #ifdef NO_INLINE_FUNCS > extern size_t journal_tag_bytes(journal_t *journal); > @@ -392,6 +394,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) > JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) > JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) > JBD2_FEATURE_INCOMPAT_FUNCS(fast_commit, FAST_COMMIT) > +JBD2_FEATURE_INCOMPAT_FUNCS(repair, REPAIR) > > #if (defined(E2FSCK_INCLUDE_INLINE_FUNCS) || !defined(NO_INLINE_FUNCS)) > /* > diff --git a/misc/ext4.5.in b/misc/ext4.5.in > index 90bc4f88..95266864 100644 > --- a/misc/ext4.5.in > +++ b/misc/ext4.5.in > @@ -574,6 +574,10 @@ Commit block can be written to disk without waiting for descriptor blocks. If > enabled older kernels cannot mount the device. > This will enable 'journal_checksum' internally. > .TP > +.B journal_repair > +If journal is broken, apply all valid transactions and pass > +all broken ones(with invalid crc). > +.TP > .BR barrier=0 " / " barrier=1 " / " barrier " / " nobarrier > These mount options have the same effect as in ext3. The mount options > "barrier" and "nobarrier" are added for consistency with other ext4 mount > -- > 2.18.4 > Cheers, Andreas
Attachment:
signature.asc
Description: Message signed with OpenPGP