[CC += linux-api@] On Mon, Feb 2, 2015 at 6:37 AM, Theodore Ts'o <tytso@xxxxxxx> wrote: > Add an optimization for the MS_LAZYTIME mount option so that we will > opportunistically write out any inodes with the I_DIRTY_TIME flag set > in a particular inode table block when we need to update some inode in > that inode table block anyway. > > Also add some temporary code so that we can set the lazytime mount > option without needing a modified /sbin/mount program which can set > MS_LAZYTIME. We can eventually make this go away once util-linux has > added support. > > Google-Bug-Id: 18297052 > > Signed-off-by: Theodore Ts'o <tytso@xxxxxxx> > --- > fs/ext4/inode.c | 64 +++++++++++++++++++++++++++++++++++++++++++-- > fs/ext4/super.c | 10 +++++++ > include/trace/events/ext4.h | 30 +++++++++++++++++++++ > 3 files changed, 102 insertions(+), 2 deletions(-) > > diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c > index 628df5b..9193ea1 100644 > --- a/fs/ext4/inode.c > +++ b/fs/ext4/inode.c > @@ -4139,6 +4139,65 @@ static int ext4_inode_blocks_set(handle_t *handle, > return 0; > } > > +struct other_inode { > + unsigned long orig_ino; > + struct ext4_inode *raw_inode; > +}; > + > +static int other_inode_match(struct inode * inode, unsigned long ino, > + void *data) > +{ > + struct other_inode *oi = (struct other_inode *) data; > + > + if ((inode->i_ino != ino) || > + (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | > + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || > + ((inode->i_state & I_DIRTY_TIME) == 0)) > + return 0; > + spin_lock(&inode->i_lock); > + if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | > + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && > + (inode->i_state & I_DIRTY_TIME)) { > + struct ext4_inode_info *ei = EXT4_I(inode); > + > + inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); > + spin_unlock(&inode->i_lock); > + > + spin_lock(&ei->i_raw_lock); > + EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); > + EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); > + EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); > + ext4_inode_csum_set(inode, oi->raw_inode, ei); > + spin_unlock(&ei->i_raw_lock); > + trace_ext4_other_inode_update_time(inode, oi->orig_ino); > + return -1; > + } > + spin_unlock(&inode->i_lock); > + return -1; > +} > + > +/* > + * Opportunistically update the other time fields for other inodes in > + * the same inode table block. > + */ > +static void ext4_update_other_inodes_time(struct super_block *sb, > + unsigned long orig_ino, char *buf) > +{ > + struct other_inode oi; > + unsigned long ino; > + int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; > + int inode_size = EXT4_INODE_SIZE(sb); > + > + oi.orig_ino = orig_ino; > + ino = orig_ino & ~(inodes_per_block - 1); > + for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { > + if (ino == orig_ino) > + continue; > + oi.raw_inode = (struct ext4_inode *) buf; > + (void) find_inode_nowait(sb, ino, other_inode_match, &oi); > + } > +} > + > /* > * Post the struct inode info into an on-disk inode location in the > * buffer-cache. This gobbles the caller's reference to the > @@ -4248,10 +4307,11 @@ static int ext4_do_update_inode(handle_t *handle, > cpu_to_le16(ei->i_extra_isize); > } > } > - > ext4_inode_csum_set(inode, raw_inode, ei); > - > spin_unlock(&ei->i_raw_lock); > + if (inode->i_sb->s_flags & MS_LAZYTIME) > + ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, > + bh->b_data); > > BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); > rc = ext4_handle_dirty_metadata(handle, NULL, bh); > diff --git a/fs/ext4/super.c b/fs/ext4/super.c > index 74c5f53..362b23c 100644 > --- a/fs/ext4/super.c > +++ b/fs/ext4/super.c > @@ -1139,6 +1139,7 @@ enum { > Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, > Opt_usrquota, Opt_grpquota, Opt_i_version, > Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, > + Opt_lazytime, Opt_nolazytime, > Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, > Opt_inode_readahead_blks, Opt_journal_ioprio, > Opt_dioread_nolock, Opt_dioread_lock, > @@ -1202,6 +1203,8 @@ static const match_table_t tokens = { > {Opt_i_version, "i_version"}, > {Opt_stripe, "stripe=%u"}, > {Opt_delalloc, "delalloc"}, > + {Opt_lazytime, "lazytime"}, > + {Opt_nolazytime, "nolazytime"}, > {Opt_nodelalloc, "nodelalloc"}, > {Opt_removed, "mblk_io_submit"}, > {Opt_removed, "nomblk_io_submit"}, > @@ -1459,6 +1462,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, > case Opt_i_version: > sb->s_flags |= MS_I_VERSION; > return 1; > + case Opt_lazytime: > + sb->s_flags |= MS_LAZYTIME; > + return 1; > + case Opt_nolazytime: > + sb->s_flags &= ~MS_LAZYTIME; > + return 1; > } > > for (m = ext4_mount_opts; m->token != Opt_err; m++) > @@ -5020,6 +5029,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) > } > #endif > > + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); > ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); > kfree(orig_data); > return 0; > diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h > index 6cfb841..6e5abd6 100644 > --- a/include/trace/events/ext4.h > +++ b/include/trace/events/ext4.h > @@ -73,6 +73,36 @@ struct extent_status; > { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) > > > +TRACE_EVENT(ext4_other_inode_update_time, > + TP_PROTO(struct inode *inode, ino_t orig_ino), > + > + TP_ARGS(inode, orig_ino), > + > + TP_STRUCT__entry( > + __field( dev_t, dev ) > + __field( ino_t, ino ) > + __field( ino_t, orig_ino ) > + __field( uid_t, uid ) > + __field( gid_t, gid ) > + __field( __u16, mode ) > + ), > + > + TP_fast_assign( > + __entry->orig_ino = orig_ino; > + __entry->dev = inode->i_sb->s_dev; > + __entry->ino = inode->i_ino; > + __entry->uid = i_uid_read(inode); > + __entry->gid = i_gid_read(inode); > + __entry->mode = inode->i_mode; > + ), > + > + TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u", > + MAJOR(__entry->dev), MINOR(__entry->dev), > + (unsigned long) __entry->orig_ino, > + (unsigned long) __entry->ino, __entry->mode, > + __entry->uid, __entry->gid) > +); > + > TRACE_EVENT(ext4_free_inode, > TP_PROTO(struct inode *inode), > > -- > 2.1.0 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Michael Kerrisk Linux man-pages maintainer; http://www.kernel.org/doc/man-pages/ Author of "The Linux Programming Interface", http://blog.man7.org/ -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html