On Wed, Jul 15, 2020 at 03:18:11PM +0200, Jan Kara wrote: > Currently, system zones just track ranges of block, that are "important" > fs metadata (bitmaps, group descriptors, journal blocks, etc.). This > however complicates how extent tree (or indirect blocks) can be checked > for inodes that actually track such metadata - currently the journal > inode but arguably we should be treating quota files or resize inode > similarly. We cannot run __ext4_ext_check() on such metadata inodes when > loading their extents as that would immediately trigger the validity > checks and so we just hack around that and special-case the journal > inode. This however leads to a situation that a journal inode which has > extent tree of depth at least one can have invalid extent tree that gets > unnoticed until ext4_cache_extents() crashes. > > To overcome this limitation, track inode number each system zone belongs > to (0 is used for zones not belonging to any inode). We can then verify > inode number matches the expected one when verifying extent tree and > thus avoid the false errors. With this there's no need to to > special-case journal inode during extent tree checking anymore so remove > it. > > Fixes: 0a944e8a6c66 ("ext4: don't perform block validity checks on the journal inode") > Reported-by: Wolfgang Frisch <wolfgang.frisch@xxxxxxxx> > Signed-off-by: Jan Kara <jack@xxxxxxx> > --- > fs/ext4/block_validity.c | 49 ++++++++++++++++++++++++------------------------ > fs/ext4/ext4.h | 6 +++--- > fs/ext4/extents.c | 16 ++++++---------- > fs/ext4/indirect.c | 6 ++---- > fs/ext4/inode.c | 5 ++--- > fs/ext4/mballoc.c | 4 ++-- > 6 files changed, 40 insertions(+), 46 deletions(-) > > diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c > index b394a50ebbe3..3602356cbf09 100644 > --- a/fs/ext4/block_validity.c > +++ b/fs/ext4/block_validity.c > @@ -24,6 +24,7 @@ struct ext4_system_zone { > struct rb_node node; > ext4_fsblk_t start_blk; > unsigned int count; > + u32 ino; > }; > > static struct kmem_cache *ext4_system_zone_cachep; > @@ -45,7 +46,8 @@ void ext4_exit_system_zone(void) > static inline int can_merge(struct ext4_system_zone *entry1, > struct ext4_system_zone *entry2) > { > - if ((entry1->start_blk + entry1->count) == entry2->start_blk) > + if ((entry1->start_blk + entry1->count) == entry2->start_blk && > + entry1->ino == entry2->ino) > return 1; > return 0; > } > @@ -66,7 +68,7 @@ static void release_system_zone(struct ext4_system_blocks *system_blks) > */ > static int add_system_zone(struct ext4_system_blocks *system_blks, > ext4_fsblk_t start_blk, > - unsigned int count) > + unsigned int count, u32 ino) > { > struct ext4_system_zone *new_entry, *entry; > struct rb_node **n = &system_blks->root.rb_node, *node; > @@ -89,6 +91,7 @@ static int add_system_zone(struct ext4_system_blocks *system_blks, > return -ENOMEM; > new_entry->start_blk = start_blk; > new_entry->count = count; > + new_entry->ino = ino; > new_node = &new_entry->node; > > rb_link_node(new_node, parent, n); > @@ -149,7 +152,7 @@ static void debug_print_tree(struct ext4_sb_info *sbi) > static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, > struct ext4_system_blocks *system_blks, > ext4_fsblk_t start_blk, > - unsigned int count) > + unsigned int count, ino_t ino) > { > struct ext4_system_zone *entry; > struct rb_node *n; > @@ -170,7 +173,7 @@ static int ext4_data_block_valid_rcu(struct ext4_sb_info *sbi, > else if (start_blk >= (entry->start_blk + entry->count)) > n = n->rb_right; > else > - return 0; > + return entry->ino == ino; > } > return 1; > } > @@ -204,19 +207,18 @@ static int ext4_protect_reserved_inode(struct super_block *sb, > if (n == 0) { > i++; > } else { > - if (!ext4_data_block_valid_rcu(sbi, system_blks, > - map.m_pblk, n)) { > - err = -EFSCORRUPTED; > - __ext4_error(sb, __func__, __LINE__, -err, > - map.m_pblk, "blocks %llu-%llu " > - "from inode %u overlap system zone", > - map.m_pblk, > - map.m_pblk + map.m_len - 1, ino); > + err = add_system_zone(system_blks, map.m_pblk, n, ino); > + if (err < 0) { > + if (err == -EFSCORRUPTED) { > + __ext4_error(sb, __func__, __LINE__, > + -err, map.m_pblk, > + "blocks %llu-%llu from inode %u overlap system zone", > + map.m_pblk, > + map.m_pblk + map.m_len - 1, > + ino); > + } > break; > } > - err = add_system_zone(system_blks, map.m_pblk, n); > - if (err < 0) > - break; > i += n; > } > } > @@ -270,19 +272,19 @@ int ext4_setup_system_zone(struct super_block *sb) > ((i < 5) || ((i % flex_size) == 0))) > add_system_zone(system_blks, > ext4_group_first_block_no(sb, i), > - ext4_bg_num_gdb(sb, i) + 1); > + ext4_bg_num_gdb(sb, i) + 1, 0); Is there a good reason we don't check the return value, it can still fail right ? Other than that the patch looks good to me. Reviewed-by: Lukas Czerner <lczerner@xxxxxxxxxx> -Lukas > gdp = ext4_get_group_desc(sb, i, NULL); > ret = add_system_zone(system_blks, > - ext4_block_bitmap(sb, gdp), 1); > + ext4_block_bitmap(sb, gdp), 1, 0); > if (ret) > goto err; > ret = add_system_zone(system_blks, > - ext4_inode_bitmap(sb, gdp), 1); > + ext4_inode_bitmap(sb, gdp), 1, 0); > if (ret) > goto err; > ret = add_system_zone(system_blks, > ext4_inode_table(sb, gdp), > - sbi->s_itb_per_group); > + sbi->s_itb_per_group, 0); > if (ret) > goto err; > } > @@ -331,7 +333,7 @@ void ext4_release_system_zone(struct super_block *sb) > call_rcu(&system_blks->rcu, ext4_destroy_system_zone); > } > > -int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, > +int ext4_inode_block_valid(struct inode *inode, ext4_fsblk_t start_blk, > unsigned int count) > { > struct ext4_system_blocks *system_blks; > @@ -344,8 +346,8 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk, > */ > rcu_read_lock(); > system_blks = rcu_dereference(sbi->system_blks); > - ret = ext4_data_block_valid_rcu(sbi, system_blks, start_blk, > - count); > + ret = ext4_data_block_valid_rcu(EXT4_SB(inode->i_sb), system_blks, > + start_blk, count, inode->i_ino); > rcu_read_unlock(); > return ret; > } > @@ -364,8 +366,7 @@ int ext4_check_blockref(const char *function, unsigned int line, > while (bref < p+max) { > blk = le32_to_cpu(*bref++); > if (blk && > - unlikely(!ext4_data_block_valid(EXT4_SB(inode->i_sb), > - blk, 1))) { > + unlikely(!ext4_inode_block_valid(inode, blk, 1))) { > ext4_error_inode(inode, function, line, blk, > "invalid block"); > return -EFSCORRUPTED; > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 42f5060f3cdf..42815304902b 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -3363,9 +3363,9 @@ extern void ext4_release_system_zone(struct super_block *sb); > extern int ext4_setup_system_zone(struct super_block *sb); > extern int __init ext4_init_system_zone(void); > extern void ext4_exit_system_zone(void); > -extern int ext4_data_block_valid(struct ext4_sb_info *sbi, > - ext4_fsblk_t start_blk, > - unsigned int count); > +extern int ext4_inode_block_valid(struct inode *inode, > + ext4_fsblk_t start_blk, > + unsigned int count); > extern int ext4_check_blockref(const char *, unsigned int, > struct inode *, __le32 *, unsigned int); > > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index 221f240eae60..d75054570e44 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -340,7 +340,7 @@ static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext) > */ > if (lblock + len <= lblock) > return 0; > - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len); > + return ext4_inode_block_valid(inode, block, len); > } > > static int ext4_valid_extent_idx(struct inode *inode, > @@ -348,7 +348,7 @@ static int ext4_valid_extent_idx(struct inode *inode, > { > ext4_fsblk_t block = ext4_idx_pblock(ext_idx); > > - return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1); > + return ext4_inode_block_valid(inode, block, 1); > } > > static int ext4_valid_extent_entries(struct inode *inode, > @@ -507,14 +507,10 @@ __read_extent_tree_block(const char *function, unsigned int line, > } > if (buffer_verified(bh) && !(flags & EXT4_EX_FORCE_CACHE)) > return bh; > - if (!ext4_has_feature_journal(inode->i_sb) || > - (inode->i_ino != > - le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) { > - err = __ext4_ext_check(function, line, inode, > - ext_block_hdr(bh), depth, pblk); > - if (err) > - goto errout; > - } > + err = __ext4_ext_check(function, line, inode, > + ext_block_hdr(bh), depth, pblk); > + if (err) > + goto errout; > set_buffer_verified(bh); > /* > * If this is a leaf block, cache all of its entries > diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c > index be2b66eb65f7..402641825712 100644 > --- a/fs/ext4/indirect.c > +++ b/fs/ext4/indirect.c > @@ -858,8 +858,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, > else if (ext4_should_journal_data(inode)) > flags |= EXT4_FREE_BLOCKS_FORGET; > > - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), block_to_free, > - count)) { > + if (!ext4_inode_block_valid(inode, block_to_free, count)) { > EXT4_ERROR_INODE(inode, "attempt to clear invalid " > "blocks %llu len %lu", > (unsigned long long) block_to_free, count); > @@ -1004,8 +1003,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, > if (!nr) > continue; /* A hole */ > > - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), > - nr, 1)) { > + if (!ext4_inode_block_valid(inode, nr, 1)) { > EXT4_ERROR_INODE(inode, > "invalid indirect mapped " > "block %lu (level %d)", > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 10dd470876b3..92573f8540ab 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -394,8 +394,7 @@ static int __check_block_validity(struct inode *inode, const char *func, > (inode->i_ino == > le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_journal_inum))) > return 0; > - if (!ext4_data_block_valid(EXT4_SB(inode->i_sb), map->m_pblk, > - map->m_len)) { > + if (!ext4_inode_block_valid(inode, map->m_pblk, map->m_len)) { > ext4_error_inode(inode, func, line, map->m_pblk, > "lblock %lu mapped to illegal pblock %llu " > "(length %d)", (unsigned long) map->m_lblk, > @@ -4760,7 +4759,7 @@ struct inode *__ext4_iget(struct super_block *sb, unsigned long ino, > > ret = 0; > if (ei->i_file_acl && > - !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) { > + !ext4_inode_block_valid(inode, ei->i_file_acl, 1)) { > ext4_error_inode(inode, function, line, 0, > "iget: bad extended attribute block %llu", > ei->i_file_acl); > diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c > index c0a331e2feb0..38719c156573 100644 > --- a/fs/ext4/mballoc.c > +++ b/fs/ext4/mballoc.c > @@ -3090,7 +3090,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac, > block = ext4_grp_offs_to_block(sb, &ac->ac_b_ex); > > len = EXT4_C2B(sbi, ac->ac_b_ex.fe_len); > - if (!ext4_data_block_valid(sbi, block, len)) { > + if (!ext4_inode_block_valid(ac->ac_inode, block, len)) { > ext4_error(sb, "Allocating blocks %llu-%llu which overlap " > "fs metadata", block, block+len); > /* File system mounted not to panic on error > @@ -4915,7 +4915,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode, > > sbi = EXT4_SB(sb); > if (!(flags & EXT4_FREE_BLOCKS_VALIDATED) && > - !ext4_data_block_valid(sbi, block, count)) { > + !ext4_inode_block_valid(inode, block, count)) { > ext4_error(sb, "Freeing blocks not in datazone - " > "block = %llu, count = %lu", block, count); > goto error_return; > -- > 2.16.4 >