Re: [PATCH v4 4/5] NFS: Improve heuristic for readdirplus

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2022-02-17 at 17:33 -0500, trondmy@xxxxxxxxxx wrote:
> From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> 
> The heuristic for readdirplus is designed to try to detect 'ls -l'
> and
> similar patterns. It does so by looking for cache hit/miss patterns
> in
> both the attribute cache and in the dcache of the files in a given
> directory, and then sets a flag for the readdirplus code to
> interpret.
> 
> The problem with this approach is that a single attribute or dcache
> miss
> can cause the NFS code to force a refresh of the attributes for the
> entire set of files contained in the directory.
> 
> To be able to make a more nuanced decision, let's sample the number
> of
> hits and misses in the set of open directory descriptors. That allows
> us
> to set thresholds at which we start preferring READDIRPLUS over
> regular
> READDIR, or at which we start to force a re-read of the remaining
> readdir cache using READDIRPLUS.
> 
> Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
> ---
>  fs/nfs/dir.c           | 77 +++++++++++++++++++++++++---------------
> --
>  fs/nfs/inode.c         |  4 +--
>  fs/nfs/internal.h      |  4 +--
>  fs/nfs/nfstrace.h      |  1 -
>  include/linux/nfs_fs.h |  5 +--
>  5 files changed, 53 insertions(+), 38 deletions(-)
> 
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 43a559b34f4a..cd57df004789 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -87,8 +87,7 @@ alloc_nfs_open_dir_context(struct inode *dir)
>                         nfs_set_cache_invalid(dir,
>                                               NFS_INO_INVALID_DATA |
>                                                      
> NFS_INO_REVAL_FORCED);
> -               list_add(&ctx->list, &nfsi->open_files);
> -               clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
> +               list_add_tail_rcu(&ctx->list, &nfsi->open_files);
>                 spin_unlock(&dir->i_lock);
>                 return ctx;
>         }
> @@ -98,9 +97,9 @@ alloc_nfs_open_dir_context(struct inode *dir)
>  static void put_nfs_open_dir_context(struct inode *dir, struct
> nfs_open_dir_context *ctx)
>  {
>         spin_lock(&dir->i_lock);
> -       list_del(&ctx->list);
> +       list_del_rcu(&ctx->list);
>         spin_unlock(&dir->i_lock);
> -       kfree(ctx);
> +       kfree_rcu(ctx, rcu_head);
>  }
>  
>  /*
> @@ -567,7 +566,6 @@ static int nfs_readdir_xdr_filler(struct
> nfs_readdir_descriptor *desc,
>                 /* We requested READDIRPLUS, but the server doesn't
> grok it */
>                 if (error == -ENOTSUPP && desc->plus) {
>                         NFS_SERVER(inode)->caps &=
> ~NFS_CAP_READDIRPLUS;
> -                       clear_bit(NFS_INO_ADVISE_RDPLUS,
> &NFS_I(inode)->flags);
>                         desc->plus = arg.plus = false;
>                         goto again;
>                 }
> @@ -617,51 +615,57 @@ int nfs_same_file(struct dentry *dentry, struct
> nfs_entry *entry)
>         return 1;
>  }
>  
> -static
> -bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx)
> +#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
> +
> +static bool nfs_use_readdirplus(struct inode *dir, struct
> dir_context *ctx,
> +                               unsigned int cache_hits,
> +                               unsigned int cache_misses)
>  {
>         if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
>                 return false;
> -       if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)-
> >flags))
> -               return true;
> -       if (ctx->pos == 0)
> +       if (ctx->pos == 0 ||
> +           cache_hits + cache_misses >
> NFS_READDIR_CACHE_USAGE_THRESHOLD)
>                 return true;
>         return false;
>  }
>  
>  /*
> - * This function is called by the lookup and getattr code to request
> the
> + * This function is called by the getattr code to request the
>   * use of readdirplus to accelerate any future lookups in the same
>   * directory.
>   */
> -void nfs_advise_use_readdirplus(struct inode *dir)
> +void nfs_readdir_record_entry_cache_hit(struct inode *dir)
>  {
>         struct nfs_inode *nfsi = NFS_I(dir);
> +       struct nfs_open_dir_context *ctx;
>  
> -       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> -           !list_empty(&nfsi->open_files))
> -               set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> +       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
> +               list_for_each_entry_rcu (ctx, &nfsi->open_files,
> list)
> +                       atomic_inc(&ctx->cache_hits);

Missing rcu_read_lock()/rcu_read_unlock() protection. Fixed in the
version committed to the testing branch.

> +       }
>  }
>  
>  /*
>   * This function is mainly for use by nfs_getattr().
>   *
>   * If this is an 'ls -l', we want to force use of readdirplus.
> - * Do this by checking if there is an active file descriptor
> - * and calling nfs_advise_use_readdirplus, then forcing a
> - * cache flush.
>   */
> -void nfs_force_use_readdirplus(struct inode *dir)
> +void nfs_readdir_record_entry_cache_miss(struct inode *dir)
>  {
>         struct nfs_inode *nfsi = NFS_I(dir);
> +       struct nfs_open_dir_context *ctx;
>  
> -       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
> -           !list_empty(&nfsi->open_files)) {
> -               set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
> -               set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags);
> +       if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) {
> +               list_for_each_entry_rcu (ctx, &nfsi->open_files,
> list)
> +                       atomic_inc(&ctx->cache_misses);

Ditto.

>         }
>  }
>  
> +static void nfs_readdir_record_dcache_miss(struct inode *dir)
> +{
> +       nfs_readdir_record_entry_cache_miss(dir);
> +}
> +
>  static
>  void nfs_prime_dcache(struct dentry *parent, struct nfs_entry
> *entry,
>                 unsigned long dir_verifier)
> @@ -1101,6 +1105,18 @@ static int uncached_readdir(struct
> nfs_readdir_descriptor *desc)
>         return status;
>  }
>  
> +#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
> +
> +static void nfs_readdir_handle_cache_misses(struct inode *inode,
> +                                           struct
> nfs_readdir_descriptor *desc,
> +                                           pgoff_t page_index,
> +                                           unsigned int
> cache_misses)
> +{
> +       if (desc->ctx->pos != 0 &&
> +           cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD)
> +               invalidate_mapping_pages(inode->i_mapping, page_index
> + 1, -1);
> +}
> +
>  /* The file offset position represents the dirent entry number.  A
>     last cookie cache takes care of the common case of reading the
>     whole directory.
> @@ -1112,6 +1128,7 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>         struct nfs_inode *nfsi = NFS_I(inode);
>         struct nfs_open_dir_context *dir_ctx = file->private_data;
>         struct nfs_readdir_descriptor *desc;
> +       unsigned int cache_hits, cache_misses;
>         pgoff_t page_index;
>         int res;
>  
> @@ -1137,7 +1154,6 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>                 goto out;
>         desc->file = file;
>         desc->ctx = ctx;
> -       desc->plus = nfs_use_readdirplus(inode, ctx);
>         desc->page_index_max = -1;
>  
>         spin_lock(&file->f_lock);
> @@ -1150,6 +1166,8 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>         desc->page_fill_misses = dir_ctx->page_fill_misses;
>         nfs_set_dtsize(desc, dir_ctx->dtsize);
>         memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
> +       cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
> +       cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
>         spin_unlock(&file->f_lock);
>  
>         if (desc->eof) {
> @@ -1157,9 +1175,8 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>                 goto out_free;
>         }
>  
> -       if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags)
> &&
> -           list_is_singular(&nfsi->open_files))
> -               invalidate_mapping_pages(inode->i_mapping, page_index
> + 1, -1);
> +       desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits,
> cache_misses);
> +       nfs_readdir_handle_cache_misses(inode, desc, page_index,
> cache_misses);
>  
>         do {
>                 res = readdir_search_pagecache(desc);
> @@ -1178,7 +1195,6 @@ static int nfs_readdir(struct file *file,
> struct dir_context *ctx)
>                         break;
>                 }
>                 if (res == -ETOOSMALL && desc->plus) {
> -                       clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi-
> >flags);
>                         nfs_zap_caches(inode);
>                         desc->page_index = 0;
>                         desc->plus = false;
> @@ -1602,7 +1618,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir,
> struct dentry *dentry,
>         nfs_set_verifier(dentry, dir_verifier);
>  
>         /* set a readdirplus hint that we had a cache miss */
> -       nfs_force_use_readdirplus(dir);
> +       nfs_readdir_record_dcache_miss(dir);
>         ret = 1;
>  out:
>         nfs_free_fattr(fattr);
> @@ -1659,7 +1675,6 @@ nfs_do_lookup_revalidate(struct inode *dir,
> struct dentry *dentry,
>                                 nfs_mark_dir_for_revalidate(dir);
>                         goto out_bad;
>                 }
> -               nfs_advise_use_readdirplus(dir);
>                 goto out_valid;
>         }
>  
> @@ -1866,7 +1881,7 @@ struct dentry *nfs_lookup(struct inode *dir,
> struct dentry * dentry, unsigned in
>                 goto out;
>  
>         /* Notify readdir to use READDIRPLUS */
> -       nfs_force_use_readdirplus(dir);
> +       nfs_readdir_record_dcache_miss(dir);
>  
>  no_entry:
>         res = d_splice_alias(inode, dentry);
> diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
> index f9fc506ebb29..1bef81f5373a 100644
> --- a/fs/nfs/inode.c
> +++ b/fs/nfs/inode.c
> @@ -789,7 +789,7 @@ static void
> nfs_readdirplus_parent_cache_miss(struct dentry *dentry)
>         if (!nfs_server_capable(d_inode(dentry),
> NFS_CAP_READDIRPLUS))
>                 return;
>         parent = dget_parent(dentry);
> -       nfs_force_use_readdirplus(d_inode(parent));
> +       nfs_readdir_record_entry_cache_miss(d_inode(parent));
>         dput(parent);
>  }
>  
> @@ -800,7 +800,7 @@ static void
> nfs_readdirplus_parent_cache_hit(struct dentry *dentry)
>         if (!nfs_server_capable(d_inode(dentry),
> NFS_CAP_READDIRPLUS))
>                 return;
>         parent = dget_parent(dentry);
> -       nfs_advise_use_readdirplus(d_inode(parent));
> +       nfs_readdir_record_entry_cache_hit(d_inode(parent));
>         dput(parent);
>  }
>  
> diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
> index 2de7c56a1fbe..46dc97b65661 100644
> --- a/fs/nfs/internal.h
> +++ b/fs/nfs/internal.h
> @@ -366,8 +366,8 @@ extern struct nfs_client *nfs_init_client(struct
> nfs_client *clp,
>                            const struct nfs_client_initdata *);
>  
>  /* dir.c */
> -extern void nfs_advise_use_readdirplus(struct inode *dir);
> -extern void nfs_force_use_readdirplus(struct inode *dir);
> +extern void nfs_readdir_record_entry_cache_hit(struct inode *dir);
> +extern void nfs_readdir_record_entry_cache_miss(struct inode *dir);
>  extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
>                                             struct shrink_control
> *sc);
>  extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
> diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h
> index 45a310b586ce..3672f6703ee7 100644
> --- a/fs/nfs/nfstrace.h
> +++ b/fs/nfs/nfstrace.h
> @@ -36,7 +36,6 @@
>  
>  #define nfs_show_nfsi_flags(v) \
>         __print_flags(v, "|", \
> -                       { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS"
> }, \
>                         { BIT(NFS_INO_STALE), "STALE" }, \
>                         { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" },
> \
>                         { BIT(NFS_INO_INVALIDATING), "INVALIDATING"
> }, \
> diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
> index 9e5fc29723c2..e21bd9452d27 100644
> --- a/include/linux/nfs_fs.h
> +++ b/include/linux/nfs_fs.h
> @@ -101,6 +101,8 @@ struct nfs_open_context {
>  
>  struct nfs_open_dir_context {
>         struct list_head list;
> +       atomic_t cache_hits;
> +       atomic_t cache_misses;
>         unsigned long attr_gencount;
>         __be32  verf[NFS_DIR_VERIFIER_SIZE];
>         __u64 dir_cookie;
> @@ -110,6 +112,7 @@ struct nfs_open_dir_context {
>         unsigned int dtsize;
>         signed char duped;
>         bool eof;
> +       struct rcu_head rcu_head;
>  };
>  
>  /*
> @@ -274,13 +277,11 @@ struct nfs4_copy_state {
>  /*
>   * Bit offsets in flags field
>   */
> -#define NFS_INO_ADVISE_RDPLUS  (0)             /* advise readdirplus
> */
>  #define NFS_INO_STALE          (1)             /* possible stale
> inode */
>  #define NFS_INO_ACL_LRU_SET    (2)             /* Inode is on the
> LRU list */
>  #define NFS_INO_INVALIDATING   (3)             /* inode is being
> invalidated */
>  #define NFS_INO_PRESERVE_UNLINKED (4)          /* preserve file if
> removed while open */
>  #define NFS_INO_FSCACHE                (5)             /* inode can
> be cached by FS-Cache */
> -#define NFS_INO_FORCE_READDIR  (7)             /* force readdirplus
> */
>  #define NFS_INO_LAYOUTCOMMIT   (9)             /* layoutcommit
> required */
>  #define NFS_INO_LAYOUTCOMMITTING (10)          /* layoutcommit
> inflight */
>  #define NFS_INO_LAYOUTSTATS    (11)            /* layoutstats
> inflight */

-- 
Trond Myklebust
Linux NFS client maintainer, Hammerspace
trond.myklebust@xxxxxxxxxxxxxxx






[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux