On Sun 24-01-10 14:35:57, Theodore Ts'o wrote: > At several places we modify EXT4_I(inode)->i_state without holding > i_mutex (ext4_release_file, ext4_bmap, ext4_journalled_writepage, > ext4_do_update_inode, ...). These modifications are racy and we can > lose updates to i_state. So convert handling of i_state to use bitops > which are atomic. > > Cc: Jan Kara <jack@xxxxxxx> > Signed-off-by: "Theodore Ts'o" <tytso@xxxxxxx> The patch looks good. Acked-by: Jan Kara <jack@xxxxxxx> Honza > --- > fs/ext4/ext4.h | 41 +++++++++++++++++++++++++++++------------ > fs/ext4/extents.c | 8 ++++---- > fs/ext4/file.c | 4 ++-- > fs/ext4/ialloc.c | 3 ++- > fs/ext4/inode.c | 38 ++++++++++++++++++++------------------ > fs/ext4/migrate.c | 6 +++--- > fs/ext4/xattr.c | 22 +++++++++++----------- > 7 files changed, 71 insertions(+), 51 deletions(-) > > diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h > index 2ca1b41..ac000a3 100644 > --- a/fs/ext4/ext4.h > +++ b/fs/ext4/ext4.h > @@ -313,17 +313,6 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags) > return flags & EXT4_OTHER_FLMASK; > } > > -/* > - * Inode dynamic state flags > - */ > -#define EXT4_STATE_JDATA 0x00000001 /* journaled data exists */ > -#define EXT4_STATE_NEW 0x00000002 /* inode is newly created */ > -#define EXT4_STATE_XATTR 0x00000004 /* has in-inode xattrs */ > -#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ > -#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ > -#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ > -#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/ > - > /* Used to pass group descriptor data when online resize is done */ > struct ext4_new_group_input { > __u32 group; /* Group number for this data */ > @@ -631,7 +620,7 @@ struct ext4_inode_info { > * near to their parent directory's inode. > */ > ext4_group_t i_block_group; > - __u32 i_state; /* Dynamic state flags for ext4 */ > + unsigned long i_state_flags; /* Dynamic state flags */ > > ext4_lblk_t i_dir_start_lookup; > #ifdef CONFIG_EXT4_FS_XATTR > @@ -1051,6 +1040,34 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino) > (ino >= EXT4_FIRST_INO(sb) && > ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)); > } > + > +/* > + * Inode dynamic state flags > + */ > +enum { > + EXT4_STATE_JDATA, /* journaled data exists */ > + EXT4_STATE_NEW, /* inode is newly created */ > + EXT4_STATE_XATTR, /* has in-inode xattrs */ > + EXT4_STATE_NO_EXPAND, /* No space for expansion */ > + EXT4_STATE_DA_ALLOC_CLOSE, /* Alloc DA blks on close */ > + EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ > + EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ > +}; > + > +static inline int ext4_test_inode_state(struct inode *inode, int bit) > +{ > + return test_bit(bit, &EXT4_I(inode)->i_state_flags); > +} > + > +static inline void ext4_set_inode_state(struct inode *inode, int bit) > +{ > + set_bit(bit, &EXT4_I(inode)->i_state_flags); > +} > + > +static inline void ext4_clear_inode_state(struct inode *inode, int bit) > +{ > + clear_bit(bit, &EXT4_I(inode)->i_state_flags); > +} > #else > /* Assume that user mode programs are passing in an ext4fs superblock, not > * a kernel struct super_block. This will allow us to call the feature-test > diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c > index 8a20a5e..5a5a47a 100644 > --- a/fs/ext4/extents.c > +++ b/fs/ext4/extents.c > @@ -3068,7 +3068,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, > if (io) > io->flag = DIO_AIO_UNWRITTEN; > else > - EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN; > + ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); > goto out; > } > /* async DIO end_io complete, convert the filled extent to written */ > @@ -3342,8 +3342,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, > if (io) > io->flag = DIO_AIO_UNWRITTEN; > else > - EXT4_I(inode)->i_state |= > - EXT4_STATE_DIO_UNWRITTEN;; > + ext4_set_inode_state(inode, > + EXT4_STATE_DIO_UNWRITTEN); > } > } > err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); > @@ -3721,7 +3721,7 @@ static int ext4_xattr_fiemap(struct inode *inode, > int error = 0; > > /* in-inode? */ > - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { > + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { > struct ext4_iloc iloc; > int offset; /* offset of xattr in inode */ > > diff --git a/fs/ext4/file.c b/fs/ext4/file.c > index 9630583..f6071ce 100644 > --- a/fs/ext4/file.c > +++ b/fs/ext4/file.c > @@ -35,9 +35,9 @@ > */ > static int ext4_release_file(struct inode *inode, struct file *filp) > { > - if (EXT4_I(inode)->i_state & EXT4_STATE_DA_ALLOC_CLOSE) { > + if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) { > ext4_alloc_da_blocks(inode); > - EXT4_I(inode)->i_state &= ~EXT4_STATE_DA_ALLOC_CLOSE; > + ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); > } > /* if we are the last writer on the inode, drop the block reservation */ > if ((filp->f_mode & FMODE_WRITE) && > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > index f3624ea..2fab5ad 100644 > --- a/fs/ext4/ialloc.c > +++ b/fs/ext4/ialloc.c > @@ -1029,7 +1029,8 @@ got: > inode->i_generation = sbi->s_next_generation++; > spin_unlock(&sbi->s_next_gen_lock); > > - ei->i_state = EXT4_STATE_NEW; > + ei->i_state_flags = 0; > + ext4_set_inode_state(inode, EXT4_STATE_NEW); > > ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize; > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 1f432b5..30b814f 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -1307,7 +1307,7 @@ int ext4_get_blocks(handle_t *handle, struct inode *inode, sector_t block, > * i_data's format changing. Force the migrate > * to fail by clearing migrate flags > */ > - EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; > + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); > } > > /* > @@ -1794,7 +1794,7 @@ static int ext4_journalled_write_end(struct file *file, > new_i_size = pos + copied; > if (new_i_size > inode->i_size) > i_size_write(inode, pos+copied); > - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; > + ext4_set_inode_state(inode, EXT4_STATE_JDATA); > if (new_i_size > EXT4_I(inode)->i_disksize) { > ext4_update_i_disksize(inode, new_i_size); > ret2 = ext4_mark_inode_dirty(handle, inode); > @@ -2616,7 +2616,7 @@ static int __ext4_journalled_writepage(struct page *page, > ret = err; > > walk_page_buffers(handle, page_bufs, 0, len, NULL, bput_one); > - EXT4_I(inode)->i_state |= EXT4_STATE_JDATA; > + ext4_set_inode_state(inode, EXT4_STATE_JDATA); > out: > return ret; > } > @@ -3287,7 +3287,8 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) > filemap_write_and_wait(mapping); > } > > - if (EXT4_JOURNAL(inode) && EXT4_I(inode)->i_state & EXT4_STATE_JDATA) { > + if (EXT4_JOURNAL(inode) && > + ext4_test_inode_state(inode, EXT4_STATE_JDATA)) { > /* > * This is a REALLY heavyweight approach, but the use of > * bmap on dirty files is expected to be extremely rare: > @@ -3306,7 +3307,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) > * everything they get. > */ > > - EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA; > + ext4_clear_inode_state(inode, EXT4_STATE_JDATA); > journal = EXT4_JOURNAL(inode); > jbd2_journal_lock_updates(journal); > err = jbd2_journal_flush(journal); > @@ -3774,8 +3775,8 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, > if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { > ext4_free_io_end(iocb->private); > iocb->private = NULL; > - } else if (ret > 0 && (EXT4_I(inode)->i_state & > - EXT4_STATE_DIO_UNWRITTEN)) { > + } else if (ret > 0 && ext4_test_inode_state(inode, > + EXT4_STATE_DIO_UNWRITTEN)) { > int err; > /* > * for non AIO case, since the IO is already > @@ -3785,7 +3786,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, > offset, ret); > if (err < 0) > ret = err; > - EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN; > + ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); > } > return ret; > } > @@ -4441,7 +4442,7 @@ void ext4_truncate(struct inode *inode) > return; > > if (inode->i_size == 0 && !test_opt(inode->i_sb, NO_AUTO_DA_ALLOC)) > - ei->i_state |= EXT4_STATE_DA_ALLOC_CLOSE; > + ext4_set_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE); > > if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) { > ext4_ext_truncate(inode); > @@ -4727,7 +4728,7 @@ int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc) > { > /* We have all inode data except xattrs in memory here. */ > return __ext4_get_inode_loc(inode, iloc, > - !(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)); > + !ext4_test_inode_state(inode, EXT4_STATE_XATTR)); > } > > void ext4_set_inode_flags(struct inode *inode) > @@ -4821,7 +4822,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) > } > inode->i_nlink = le16_to_cpu(raw_inode->i_links_count); > > - ei->i_state = 0; > + ei->i_state_flags = 0; > ei->i_dir_start_lookup = 0; > ei->i_dtime = le32_to_cpu(raw_inode->i_dtime); > /* We now have enough fields to check if the inode was active or not. > @@ -4904,7 +4905,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) > EXT4_GOOD_OLD_INODE_SIZE + > ei->i_extra_isize; > if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC)) > - ei->i_state |= EXT4_STATE_XATTR; > + ext4_set_inode_state(inode, EXT4_STATE_XATTR); > } > } else > ei->i_extra_isize = 0; > @@ -5044,7 +5045,7 @@ static int ext4_do_update_inode(handle_t *handle, > > /* For fields not not tracking in the in-memory inode, > * initialise them to zero for new inodes. */ > - if (ei->i_state & EXT4_STATE_NEW) > + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) > memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); > > ext4_get_inode_flags(ei); > @@ -5140,7 +5141,7 @@ static int ext4_do_update_inode(handle_t *handle, > rc = ext4_handle_dirty_metadata(handle, inode, bh); > if (!err) > err = rc; > - ei->i_state &= ~EXT4_STATE_NEW; > + ext4_clear_inode_state(inode, EXT4_STATE_NEW); > > ext4_update_inode_fsync_trans(handle, inode, 0); > out_brelse: > @@ -5564,8 +5565,8 @@ static int ext4_expand_extra_isize(struct inode *inode, > entry = IFIRST(header); > > /* No extended attributes present */ > - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR) || > - header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { > + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR) || > + header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)) { > memset((void *)raw_inode + EXT4_GOOD_OLD_INODE_SIZE, 0, > new_extra_isize); > EXT4_I(inode)->i_extra_isize = new_extra_isize; > @@ -5609,7 +5610,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) > err = ext4_reserve_inode_write(handle, inode, &iloc); > if (ext4_handle_valid(handle) && > EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize && > - !(EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND)) { > + !ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND)) { > /* > * We need extra buffer credits since we may write into EA block > * with this same handle. If journal_extend fails, then it will > @@ -5623,7 +5624,8 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) > sbi->s_want_extra_isize, > iloc, handle); > if (ret) { > - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; > + ext4_set_inode_state(inode, > + EXT4_STATE_NO_EXPAND); > if (mnt_count != > le16_to_cpu(sbi->s_es->s_mnt_count)) { > ext4_warning(inode->i_sb, __func__, > diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c > index 8141581..46a4101 100644 > --- a/fs/ext4/migrate.c > +++ b/fs/ext4/migrate.c > @@ -365,12 +365,12 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode, > * happened after we started the migrate. We need to > * fail the migrate > */ > - if (!(EXT4_I(inode)->i_state & EXT4_STATE_EXT_MIGRATE)) { > + if (!ext4_test_inode_state(inode, EXT4_STATE_EXT_MIGRATE)) { > retval = -EAGAIN; > up_write(&EXT4_I(inode)->i_data_sem); > goto err_out; > } else > - EXT4_I(inode)->i_state &= ~EXT4_STATE_EXT_MIGRATE; > + ext4_clear_inode_state(inode, EXT4_STATE_EXT_MIGRATE); > /* > * We have the extent map build with the tmp inode. > * Now copy the i_data across > @@ -533,7 +533,7 @@ int ext4_ext_migrate(struct inode *inode) > * allocation. > */ > down_read((&EXT4_I(inode)->i_data_sem)); > - EXT4_I(inode)->i_state |= EXT4_STATE_EXT_MIGRATE; > + ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); > up_read((&EXT4_I(inode)->i_data_sem)); > > handle = ext4_journal_start(inode, 1); > diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c > index f3a2f7e..c619a7e 100644 > --- a/fs/ext4/xattr.c > +++ b/fs/ext4/xattr.c > @@ -267,7 +267,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name, > void *end; > int error; > > - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) > + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) > return -ENODATA; > error = ext4_get_inode_loc(inode, &iloc); > if (error) > @@ -396,7 +396,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size) > void *end; > int error; > > - if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR)) > + if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) > return 0; > error = ext4_get_inode_loc(inode, &iloc); > if (error) > @@ -908,7 +908,7 @@ ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i, > is->s.base = is->s.first = IFIRST(header); > is->s.here = is->s.first; > is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; > - if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) { > + if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { > error = ext4_xattr_check_names(IFIRST(header), is->s.end); > if (error) > return error; > @@ -940,10 +940,10 @@ ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, > header = IHDR(inode, ext4_raw_inode(&is->iloc)); > if (!IS_LAST_ENTRY(s->first)) { > header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); > - EXT4_I(inode)->i_state |= EXT4_STATE_XATTR; > + ext4_set_inode_state(inode, EXT4_STATE_XATTR); > } else { > header->h_magic = cpu_to_le32(0); > - EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR; > + ext4_clear_inode_state(inode, EXT4_STATE_XATTR); > } > return 0; > } > @@ -986,8 +986,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, > if (strlen(name) > 255) > return -ERANGE; > down_write(&EXT4_I(inode)->xattr_sem); > - no_expand = EXT4_I(inode)->i_state & EXT4_STATE_NO_EXPAND; > - EXT4_I(inode)->i_state |= EXT4_STATE_NO_EXPAND; > + no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND); > + ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND); > > error = ext4_get_inode_loc(inode, &is.iloc); > if (error) > @@ -997,10 +997,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, > if (error) > goto cleanup; > > - if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { > + if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) { > struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); > memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); > - EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW; > + ext4_clear_inode_state(inode, EXT4_STATE_NEW); > } > > error = ext4_xattr_ibody_find(inode, &i, &is); > @@ -1052,7 +1052,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index, > ext4_xattr_update_super_block(handle, inode->i_sb); > inode->i_ctime = ext4_current_time(inode); > if (!value) > - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; > + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); > error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); > /* > * The bh is consumed by ext4_mark_iloc_dirty, even with > @@ -1067,7 +1067,7 @@ cleanup: > brelse(is.iloc.bh); > brelse(bs.bh); > if (no_expand == 0) > - EXT4_I(inode)->i_state &= ~EXT4_STATE_NO_EXPAND; > + ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND); > up_write(&EXT4_I(inode)->xattr_sem); > return error; > } > -- > 1.6.6.1.1.g974db.dirty > -- Jan Kara <jack@xxxxxxx> SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-ext4" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html