Re: [PATCH-v9 1/3] vfs: add support for a lazytime mount option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



[CC += linux-api@]

On Mon, Feb 2, 2015 at 6:37 AM, Theodore Ts'o <tytso@xxxxxxx> wrote:
> Add a new mount option which enables a new "lazytime" mode.  This mode
> causes atime, mtime, and ctime updates to only be made to the
> in-memory version of the inode.  The on-disk times will only get
> updated when (a) if the inode needs to be updated for some non-time
> related change, (b) if userspace calls fsync(), syncfs() or sync(), or
> (c) just before an undeleted inode is evicted from memory.
>
> This is OK according to POSIX because there are no guarantees after a
> crash unless userspace explicitly requests via a fsync(2) call.
>
> For workloads which feature a large number of random write to a
> preallocated file, the lazytime mount option significantly reduces
> writes to the inode table.  The repeated 4k writes to a single block
> will result in undesirable stress on flash devices and SMR disk
> drives.  Even on conventional HDD's, the repeated writes to the inode
> table block will trigger Adjacent Track Interference (ATI) remediation
> latencies, which very negatively impact long tail latencies --- which
> is a very big deal for web serving tiers (for example).
>
> Google-Bug-Id: 18297052
>
> Signed-off-by: Theodore Ts'o <tytso@xxxxxxx>
> ---
>  fs/ext4/inode.c                  |  6 ++++
>  fs/fs-writeback.c                | 62 +++++++++++++++++++++++++++++++++-------
>  fs/gfs2/file.c                   |  4 +--
>  fs/inode.c                       | 56 +++++++++++++++++++++++++-----------
>  fs/jfs/file.c                    |  2 +-
>  fs/libfs.c                       |  2 +-
>  fs/proc_namespace.c              |  1 +
>  fs/sync.c                        |  8 ++++++
>  include/linux/backing-dev.h      |  1 +
>  include/linux/fs.h               |  5 ++++
>  include/trace/events/writeback.h | 60 +++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/fs.h          |  4 ++-
>  mm/backing-dev.c                 | 10 +++++--
>  13 files changed, 186 insertions(+), 35 deletions(-)
>
> diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
> index 5653fa4..628df5b 100644
> --- a/fs/ext4/inode.c
> +++ b/fs/ext4/inode.c
> @@ -4840,11 +4840,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
>   * If the inode is marked synchronous, we don't honour that here - doing
>   * so would cause a commit on atime updates, which we don't bother doing.
>   * We handle synchronous inodes at the highest possible level.
> + *
> + * If only the I_DIRTY_TIME flag is set, we can skip everything.  If
> + * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need
> + * to copy into the on-disk inode structure are the timestamp files.
>   */
>  void ext4_dirty_inode(struct inode *inode, int flags)
>  {
>         handle_t *handle;
>
> +       if (flags == I_DIRTY_TIME)
> +               return;
>         handle = ext4_journal_start(inode, EXT4_HT_INODE, 2);
>         if (IS_ERR(handle))
>                 goto out;
> diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
> index 2d609a5..0046861 100644
> --- a/fs/fs-writeback.c
> +++ b/fs/fs-writeback.c
> @@ -247,14 +247,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t)
>         return ret;
>  }
>
> +#define EXPIRE_DIRTY_ATIME 0x0001
> +
>  /*
>   * Move expired (dirtied before work->older_than_this) dirty inodes from
>   * @delaying_queue to @dispatch_queue.
>   */
>  static int move_expired_inodes(struct list_head *delaying_queue,
>                                struct list_head *dispatch_queue,
> +                              int flags,
>                                struct wb_writeback_work *work)
>  {
> +       unsigned long *older_than_this = NULL;
> +       unsigned long expire_time;
>         LIST_HEAD(tmp);
>         struct list_head *pos, *node;
>         struct super_block *sb = NULL;
> @@ -262,13 +267,21 @@ static int move_expired_inodes(struct list_head *delaying_queue,
>         int do_sb_sort = 0;
>         int moved = 0;
>
> +       if ((flags & EXPIRE_DIRTY_ATIME) == 0)
> +               older_than_this = work->older_than_this;
> +       else if ((work->reason == WB_REASON_SYNC) == 0) {
> +               expire_time = jiffies - (HZ * 86400);
> +               older_than_this = &expire_time;
> +       }
>         while (!list_empty(delaying_queue)) {
>                 inode = wb_inode(delaying_queue->prev);
> -               if (work->older_than_this &&
> -                   inode_dirtied_after(inode, *work->older_than_this))
> +               if (older_than_this &&
> +                   inode_dirtied_after(inode, *older_than_this))
>                         break;
>                 list_move(&inode->i_wb_list, &tmp);
>                 moved++;
> +               if (flags & EXPIRE_DIRTY_ATIME)
> +                       set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state);
>                 if (sb_is_blkdev_sb(inode->i_sb))
>                         continue;
>                 if (sb && sb != inode->i_sb)
> @@ -309,9 +322,12 @@ out:
>  static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work)
>  {
>         int moved;
> +
>         assert_spin_locked(&wb->list_lock);
>         list_splice_init(&wb->b_more_io, &wb->b_io);
> -       moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work);
> +       moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work);
> +       moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io,
> +                                    EXPIRE_DIRTY_ATIME, work);
>         trace_writeback_queue_io(wb, work, moved);
>  }
>
> @@ -435,6 +451,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb,
>                  * updates after data IO completion.
>                  */
>                 redirty_tail(inode, wb);
> +       } else if (inode->i_state & I_DIRTY_TIME) {
> +               list_move(&inode->i_wb_list, &wb->b_dirty_time);
>         } else {
>                 /* The inode is clean. Remove from writeback lists. */
>                 list_del_init(&inode->i_wb_list);
> @@ -481,7 +499,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
>         spin_lock(&inode->i_lock);
>
>         dirty = inode->i_state & I_DIRTY;
> -       inode->i_state &= ~I_DIRTY;
> +       if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) &&
> +            (inode->i_state & I_DIRTY_TIME)) ||
> +           (inode->i_state & I_DIRTY_TIME_EXPIRED)) {
> +               dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED;
> +               trace_writeback_lazytime(inode);
> +       }
> +       inode->i_state &= ~dirty;
>
>         /*
>          * Paired with smp_mb() in __mark_inode_dirty().  This allows
> @@ -501,8 +525,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
>
>         spin_unlock(&inode->i_lock);
>
> +       if (dirty & I_DIRTY_TIME)
> +               mark_inode_dirty_sync(inode);
>         /* Don't write the inode if only I_DIRTY_PAGES was set */
> -       if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
> +       if (dirty & ~I_DIRTY_PAGES) {
>                 int err = write_inode(inode, wbc);
>                 if (ret == 0)
>                         ret = err;
> @@ -550,7 +576,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
>          * make sure inode is on some writeback list and leave it there unless
>          * we have completely cleaned the inode.
>          */
> -       if (!(inode->i_state & I_DIRTY) &&
> +       if (!(inode->i_state & I_DIRTY_ALL) &&
>             (wbc->sync_mode != WB_SYNC_ALL ||
>              !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK)))
>                 goto out;
> @@ -565,7 +591,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb,
>          * If inode is clean, remove it from writeback lists. Otherwise don't
>          * touch it. See comment above for explanation.
>          */
> -       if (!(inode->i_state & I_DIRTY))
> +       if (!(inode->i_state & I_DIRTY_ALL))
>                 list_del_init(&inode->i_wb_list);
>         spin_unlock(&wb->list_lock);
>         inode_sync_complete(inode);
> @@ -707,7 +733,7 @@ static long writeback_sb_inodes(struct super_block *sb,
>                 wrote += write_chunk - wbc.nr_to_write;
>                 spin_lock(&wb->list_lock);
>                 spin_lock(&inode->i_lock);
> -               if (!(inode->i_state & I_DIRTY))
> +               if (!(inode->i_state & I_DIRTY_ALL))
>                         wrote++;
>                 requeue_inode(inode, wb, &wbc);
>                 inode_sync_complete(inode);
> @@ -1145,16 +1171,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode)
>   * page->mapping->host, so the page-dirtying time is recorded in the internal
>   * blockdev inode.
>   */
> +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC)
>  void __mark_inode_dirty(struct inode *inode, int flags)
>  {
>         struct super_block *sb = inode->i_sb;
>         struct backing_dev_info *bdi = NULL;
> +       int dirtytime;
> +
> +       trace_writeback_mark_inode_dirty(inode, flags);
>
>         /*
>          * Don't do this for I_DIRTY_PAGES - that doesn't actually
>          * dirty the inode itself
>          */
> -       if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) {
> +       if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) {
>                 trace_writeback_dirty_inode_start(inode, flags);
>
>                 if (sb->s_op->dirty_inode)
> @@ -1162,6 +1192,9 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>
>                 trace_writeback_dirty_inode(inode, flags);
>         }
> +       if (flags & I_DIRTY_INODE)
> +               flags &= ~I_DIRTY_TIME;
> +       dirtytime = flags & I_DIRTY_TIME;
>
>         /*
>          * Paired with smp_mb() in __writeback_single_inode() for the
> @@ -1169,16 +1202,21 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>          */
>         smp_mb();
>
> -       if ((inode->i_state & flags) == flags)
> +       if (((inode->i_state & flags) == flags) ||
> +           (dirtytime && (inode->i_state & I_DIRTY_INODE)))
>                 return;
>
>         if (unlikely(block_dump))
>                 block_dump___mark_inode_dirty(inode);
>
>         spin_lock(&inode->i_lock);
> +       if (dirtytime && (inode->i_state & I_DIRTY_INODE))
> +               goto out_unlock_inode;
>         if ((inode->i_state & flags) != flags) {
>                 const int was_dirty = inode->i_state & I_DIRTY;
>
> +               if (flags & I_DIRTY_INODE)
> +                       inode->i_state &= ~I_DIRTY_TIME;
>                 inode->i_state |= flags;
>
>                 /*
> @@ -1225,8 +1263,10 @@ void __mark_inode_dirty(struct inode *inode, int flags)
>                         }
>
>                         inode->dirtied_when = jiffies;
> -                       list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
> +                       list_move(&inode->i_wb_list, dirtytime ?
> +                                 &bdi->wb.b_dirty_time : &bdi->wb.b_dirty);
>                         spin_unlock(&bdi->wb.list_lock);
> +                       trace_writeback_dirty_inode_enqueue(inode);
>
>                         if (wakeup_bdi)
>                                 bdi_wakeup_thread_delayed(bdi);
> diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
> index 6e600ab..15c44cf 100644
> --- a/fs/gfs2/file.c
> +++ b/fs/gfs2/file.c
> @@ -655,7 +655,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
>  {
>         struct address_space *mapping = file->f_mapping;
>         struct inode *inode = mapping->host;
> -       int sync_state = inode->i_state & I_DIRTY;
> +       int sync_state = inode->i_state & I_DIRTY_ALL;
>         struct gfs2_inode *ip = GFS2_I(inode);
>         int ret = 0, ret1 = 0;
>
> @@ -668,7 +668,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end,
>         if (!gfs2_is_jdata(ip))
>                 sync_state &= ~I_DIRTY_PAGES;
>         if (datasync)
> -               sync_state &= ~I_DIRTY_SYNC;
> +               sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME);
>
>         if (sync_state) {
>                 ret = sync_inode_metadata(inode, 1);
> diff --git a/fs/inode.c b/fs/inode.c
> index aa149e7..4feb85c 100644
> --- a/fs/inode.c
> +++ b/fs/inode.c
> @@ -18,6 +18,7 @@
>  #include <linux/buffer_head.h> /* for inode_has_buffers */
>  #include <linux/ratelimit.h>
>  #include <linux/list_lru.h>
> +#include <trace/events/writeback.h>
>  #include "internal.h"
>
>  /*
> @@ -30,7 +31,7 @@
>   * inode_sb_list_lock protects:
>   *   sb->s_inodes, inode->i_sb_list
>   * bdi->wb.list_lock protects:
> - *   bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list
> + *   bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list
>   * inode_hash_lock protects:
>   *   inode_hashtable, inode->i_hash
>   *
> @@ -416,7 +417,8 @@ static void inode_lru_list_add(struct inode *inode)
>   */
>  void inode_add_lru(struct inode *inode)
>  {
> -       if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) &&
> +       if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC |
> +                               I_FREEING | I_WILL_FREE)) &&
>             !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE)
>                 inode_lru_list_add(inode);
>  }
> @@ -647,7 +649,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty)
>                         spin_unlock(&inode->i_lock);
>                         continue;
>                 }
> -               if (inode->i_state & I_DIRTY && !kill_dirty) {
> +               if (inode->i_state & I_DIRTY_ALL && !kill_dirty) {
>                         spin_unlock(&inode->i_lock);
>                         busy = 1;
>                         continue;
> @@ -1432,11 +1434,20 @@ static void iput_final(struct inode *inode)
>   */
>  void iput(struct inode *inode)
>  {
> -       if (inode) {
> -               BUG_ON(inode->i_state & I_CLEAR);
> -
> -               if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock))
> -                       iput_final(inode);
> +       if (!inode)
> +               return;
> +       BUG_ON(inode->i_state & I_CLEAR);
> +retry:
> +       if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) {
> +               if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) {
> +                       atomic_inc(&inode->i_count);
> +                       inode->i_state &= ~I_DIRTY_TIME;
> +                       spin_unlock(&inode->i_lock);
> +                       trace_writeback_lazytime_iput(inode);
> +                       mark_inode_dirty_sync(inode);
> +                       goto retry;
> +               }
> +               iput_final(inode);
>         }
>  }
>  EXPORT_SYMBOL(iput);
> @@ -1495,14 +1506,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode,
>         return 0;
>  }
>
> -/*
> - * This does the actual work of updating an inodes time or version.  Must have
> - * had called mnt_want_write() before calling this.
> - */
> -static int update_time(struct inode *inode, struct timespec *time, int flags)
> +int generic_update_time(struct inode *inode, struct timespec *time, int flags)
>  {
> -       if (inode->i_op->update_time)
> -               return inode->i_op->update_time(inode, time, flags);
> +       int iflags = I_DIRTY_TIME;
>
>         if (flags & S_ATIME)
>                 inode->i_atime = *time;
> @@ -1512,9 +1518,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags)
>                 inode->i_ctime = *time;
>         if (flags & S_MTIME)
>                 inode->i_mtime = *time;
> -       mark_inode_dirty_sync(inode);
> +
> +       if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION))
> +               iflags |= I_DIRTY_SYNC;
> +       __mark_inode_dirty(inode, iflags);
>         return 0;
>  }
> +EXPORT_SYMBOL(generic_update_time);
> +
> +/*
> + * This does the actual work of updating an inodes time or version.  Must have
> + * had called mnt_want_write() before calling this.
> + */
> +static int update_time(struct inode *inode, struct timespec *time, int flags)
> +{
> +       int (*update_time)(struct inode *, struct timespec *, int);
> +
> +       update_time = inode->i_op->update_time ? inode->i_op->update_time :
> +               generic_update_time;
> +
> +       return update_time(inode, time, flags);
> +}
>
>  /**
>   *     touch_atime     -       update the access time
> diff --git a/fs/jfs/file.c b/fs/jfs/file.c
> index 33aa0cc..10815f8 100644
> --- a/fs/jfs/file.c
> +++ b/fs/jfs/file.c
> @@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
>                 return rc;
>
>         mutex_lock(&inode->i_mutex);
> -       if (!(inode->i_state & I_DIRTY) ||
> +       if (!(inode->i_state & I_DIRTY_ALL) ||
>             (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) {
>                 /* Make sure committed changes hit the disk */
>                 jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1);
> diff --git a/fs/libfs.c b/fs/libfs.c
> index 005843c..b2ffdb0 100644
> --- a/fs/libfs.c
> +++ b/fs/libfs.c
> @@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
>
>         mutex_lock(&inode->i_mutex);
>         ret = sync_mapping_buffers(inode->i_mapping);
> -       if (!(inode->i_state & I_DIRTY))
> +       if (!(inode->i_state & I_DIRTY_ALL))
>                 goto out;
>         if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
>                 goto out;
> diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
> index 0f96f71..8db932d 100644
> --- a/fs/proc_namespace.c
> +++ b/fs/proc_namespace.c
> @@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
>                 { MS_SYNCHRONOUS, ",sync" },
>                 { MS_DIRSYNC, ",dirsync" },
>                 { MS_MANDLOCK, ",mand" },
> +               { MS_LAZYTIME, ",lazytime" },
>                 { 0, NULL }
>         };
>         const struct proc_fs_info *fs_infop;
> diff --git a/fs/sync.c b/fs/sync.c
> index 01d9f18..fbc98ee 100644
> --- a/fs/sync.c
> +++ b/fs/sync.c
> @@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd)
>   */
>  int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync)
>  {
> +       struct inode *inode = file->f_mapping->host;
> +
>         if (!file->f_op->fsync)
>                 return -EINVAL;
> +       if (!datasync && (inode->i_state & I_DIRTY_TIME)) {
> +               spin_lock(&inode->i_lock);
> +               inode->i_state &= ~I_DIRTY_TIME;
> +               spin_unlock(&inode->i_lock);
> +               mark_inode_dirty_sync(inode);
> +       }
>         return file->f_op->fsync(file, start, end, datasync);
>  }
>  EXPORT_SYMBOL(vfs_fsync_range);
> diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
> index 5da6012..4cdf733 100644
> --- a/include/linux/backing-dev.h
> +++ b/include/linux/backing-dev.h
> @@ -55,6 +55,7 @@ struct bdi_writeback {
>         struct list_head b_dirty;       /* dirty inodes */
>         struct list_head b_io;          /* parked for writeback */
>         struct list_head b_more_io;     /* parked for more writeback */
> +       struct list_head b_dirty_time;  /* time stamps are dirty */
>         spinlock_t list_lock;           /* protects the b_* lists */
>  };
>
> diff --git a/include/linux/fs.h b/include/linux/fs.h
> index f90c028..5ca285f 100644
> --- a/include/linux/fs.h
> +++ b/include/linux/fs.h
> @@ -1746,8 +1746,12 @@ struct super_operations {
>  #define __I_DIO_WAKEUP         9
>  #define I_DIO_WAKEUP           (1 << I_DIO_WAKEUP)
>  #define I_LINKABLE             (1 << 10)
> +#define I_DIRTY_TIME           (1 << 11)
> +#define __I_DIRTY_TIME_EXPIRED 12
> +#define I_DIRTY_TIME_EXPIRED   (1 << __I_DIRTY_TIME_EXPIRED)
>
>  #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
> +#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME)
>
>  extern void __mark_inode_dirty(struct inode *, int);
>  static inline void mark_inode_dirty(struct inode *inode)
> @@ -1910,6 +1914,7 @@ extern int current_umask(void);
>
>  extern void ihold(struct inode * inode);
>  extern void iput(struct inode *);
> +extern int generic_update_time(struct inode *, struct timespec *, int);
>
>  static inline struct inode *file_inode(const struct file *f)
>  {
> diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h
> index cee02d6..5ecb4c2 100644
> --- a/include/trace/events/writeback.h
> +++ b/include/trace/events/writeback.h
> @@ -18,6 +18,8 @@
>                 {I_FREEING,             "I_FREEING"},           \
>                 {I_CLEAR,               "I_CLEAR"},             \
>                 {I_SYNC,                "I_SYNC"},              \
> +               {I_DIRTY_TIME,          "I_DIRTY_TIME"},        \
> +               {I_DIRTY_TIME_EXPIRED,  "I_DIRTY_TIME_EXPIRED"}, \
>                 {I_REFERENCED,          "I_REFERENCED"}         \
>         )
>
> @@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
>         TP_STRUCT__entry (
>                 __array(char, name, 32)
>                 __field(unsigned long, ino)
> +               __field(unsigned long, state)
>                 __field(unsigned long, flags)
>         ),
>
> @@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template,
>                 strncpy(__entry->name,
>                         bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32);
>                 __entry->ino            = inode->i_ino;
> +               __entry->state          = inode->i_state;
>                 __entry->flags          = flags;
>         ),
>
> -       TP_printk("bdi %s: ino=%lu flags=%s",
> +       TP_printk("bdi %s: ino=%lu state=%s flags=%s",
>                 __entry->name,
>                 __entry->ino,
> +               show_inode_state(__entry->state),
>                 show_inode_state(__entry->flags)
>         )
>  );
>
> +DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty,
> +
> +       TP_PROTO(struct inode *inode, int flags),
> +
> +       TP_ARGS(inode, flags)
> +);
> +
>  DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start,
>
>         TP_PROTO(struct inode *inode, int flags),
> @@ -598,6 +610,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode,
>         TP_ARGS(inode, wbc, nr_to_write)
>  );
>
> +DECLARE_EVENT_CLASS(writeback_lazytime_template,
> +       TP_PROTO(struct inode *inode),
> +
> +       TP_ARGS(inode),
> +
> +       TP_STRUCT__entry(
> +               __field(        dev_t,  dev                     )
> +               __field(unsigned long,  ino                     )
> +               __field(unsigned long,  state                   )
> +               __field(        __u16, mode                     )
> +               __field(unsigned long, dirtied_when             )
> +       ),
> +
> +       TP_fast_assign(
> +               __entry->dev    = inode->i_sb->s_dev;
> +               __entry->ino    = inode->i_ino;
> +               __entry->state  = inode->i_state;
> +               __entry->mode   = inode->i_mode;
> +               __entry->dirtied_when = inode->dirtied_when;
> +       ),
> +
> +       TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o",
> +                 MAJOR(__entry->dev), MINOR(__entry->dev),
> +                 __entry->ino, __entry->dirtied_when,
> +                 show_inode_state(__entry->state), __entry->mode)
> +);
> +
> +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime,
> +       TP_PROTO(struct inode *inode),
> +
> +       TP_ARGS(inode)
> +);
> +
> +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput,
> +       TP_PROTO(struct inode *inode),
> +
> +       TP_ARGS(inode)
> +);
> +
> +DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue,
> +
> +       TP_PROTO(struct inode *inode),
> +
> +       TP_ARGS(inode)
> +);
> +
>  #endif /* _TRACE_WRITEBACK_H */
>
>  /* This part must be outside protection */
> diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
> index 3735fa0..9b964a5 100644
> --- a/include/uapi/linux/fs.h
> +++ b/include/uapi/linux/fs.h
> @@ -90,6 +90,7 @@ struct inodes_stat_t {
>  #define MS_KERNMOUNT   (1<<22) /* this is a kern_mount call */
>  #define MS_I_VERSION   (1<<23) /* Update inode I_version field */
>  #define MS_STRICTATIME (1<<24) /* Always perform atime updates */
> +#define MS_LAZYTIME    (1<<25) /* Update the on-disk [acm]times lazily */
>
>  /* These sb flags are internal to the kernel */
>  #define MS_NOSEC       (1<<28)
> @@ -100,7 +101,8 @@ struct inodes_stat_t {
>  /*
>   * Superblock flags that can be altered by MS_REMOUNT
>   */
> -#define MS_RMT_MASK    (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION)
> +#define MS_RMT_MASK    (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\
> +                        MS_LAZYTIME)
>
>  /*
>   * Old magic mount flag and mask
> diff --git a/mm/backing-dev.c b/mm/backing-dev.c
> index 0ae0df5..915feea 100644
> --- a/mm/backing-dev.c
> +++ b/mm/backing-dev.c
> @@ -69,10 +69,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>         unsigned long background_thresh;
>         unsigned long dirty_thresh;
>         unsigned long bdi_thresh;
> -       unsigned long nr_dirty, nr_io, nr_more_io;
> +       unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time;
>         struct inode *inode;
>
> -       nr_dirty = nr_io = nr_more_io = 0;
> +       nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0;
>         spin_lock(&wb->list_lock);
>         list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
>                 nr_dirty++;
> @@ -80,6 +80,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>                 nr_io++;
>         list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
>                 nr_more_io++;
> +       list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list)
> +               if (inode->i_state & I_DIRTY_TIME)
> +                       nr_dirty_time++;
>         spin_unlock(&wb->list_lock);
>
>         global_dirty_limits(&background_thresh, &dirty_thresh);
> @@ -98,6 +101,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>                    "b_dirty:            %10lu\n"
>                    "b_io:               %10lu\n"
>                    "b_more_io:          %10lu\n"
> +                  "b_dirty_time:       %10lu\n"
>                    "bdi_list:           %10u\n"
>                    "state:              %10lx\n",
>                    (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)),
> @@ -111,6 +115,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
>                    nr_dirty,
>                    nr_io,
>                    nr_more_io,
> +                  nr_dirty_time,
>                    !list_empty(&bdi->bdi_list), bdi->state);
>  #undef K
>
> @@ -418,6 +423,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi)
>         INIT_LIST_HEAD(&wb->b_dirty);
>         INIT_LIST_HEAD(&wb->b_io);
>         INIT_LIST_HEAD(&wb->b_more_io);
> +       INIT_LIST_HEAD(&wb->b_dirty_time);
>         spin_lock_init(&wb->list_lock);
>         INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn);
>  }
> --
> 2.1.0
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Michael Kerrisk Linux man-pages maintainer;
http://www.kernel.org/doc/man-pages/
Author of "The Linux Programming Interface", http://blog.man7.org/
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux