On Thu, 2022-02-17 at 17:33 -0500, trondmy@xxxxxxxxxx wrote: > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > The heuristic for readdirplus is designed to try to detect 'ls -l' > and > similar patterns. It does so by looking for cache hit/miss patterns > in > both the attribute cache and in the dcache of the files in a given > directory, and then sets a flag for the readdirplus code to > interpret. > > The problem with this approach is that a single attribute or dcache > miss > can cause the NFS code to force a refresh of the attributes for the > entire set of files contained in the directory. > > To be able to make a more nuanced decision, let's sample the number > of > hits and misses in the set of open directory descriptors. That allows > us > to set thresholds at which we start preferring READDIRPLUS over > regular > READDIR, or at which we start to force a re-read of the remaining > readdir cache using READDIRPLUS. > > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > --- > fs/nfs/dir.c | 77 +++++++++++++++++++++++++--------------- > -- > fs/nfs/inode.c | 4 +-- > fs/nfs/internal.h | 4 +-- > fs/nfs/nfstrace.h | 1 - > include/linux/nfs_fs.h | 5 +-- > 5 files changed, 53 insertions(+), 38 deletions(-) > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index 43a559b34f4a..cd57df004789 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -87,8 +87,7 @@ alloc_nfs_open_dir_context(struct inode *dir) > nfs_set_cache_invalid(dir, > NFS_INO_INVALID_DATA | > > NFS_INO_REVAL_FORCED); > - list_add(&ctx->list, &nfsi->open_files); > - clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); > + list_add_tail_rcu(&ctx->list, &nfsi->open_files); > spin_unlock(&dir->i_lock); > return ctx; > } > @@ -98,9 +97,9 @@ alloc_nfs_open_dir_context(struct inode *dir) > static void put_nfs_open_dir_context(struct inode *dir, struct > nfs_open_dir_context *ctx) > { > spin_lock(&dir->i_lock); > - list_del(&ctx->list); > + list_del_rcu(&ctx->list); > spin_unlock(&dir->i_lock); > - kfree(ctx); > + kfree_rcu(ctx, rcu_head); > } > > /* > @@ -567,7 +566,6 @@ static int nfs_readdir_xdr_filler(struct > nfs_readdir_descriptor *desc, > /* We requested READDIRPLUS, but the server doesn't > grok it */ > if (error == -ENOTSUPP && desc->plus) { > NFS_SERVER(inode)->caps &= > ~NFS_CAP_READDIRPLUS; > - clear_bit(NFS_INO_ADVISE_RDPLUS, > &NFS_I(inode)->flags); > desc->plus = arg.plus = false; > goto again; > } > @@ -617,51 +615,57 @@ int nfs_same_file(struct dentry *dentry, struct > nfs_entry *entry) > return 1; > } > > -static > -bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx) > +#define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL) > + > +static bool nfs_use_readdirplus(struct inode *dir, struct > dir_context *ctx, > + unsigned int cache_hits, > + unsigned int cache_misses) > { > if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) > return false; > - if (test_and_clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(dir)- > >flags)) > - return true; > - if (ctx->pos == 0) > + if (ctx->pos == 0 || > + cache_hits + cache_misses > > NFS_READDIR_CACHE_USAGE_THRESHOLD) > return true; > return false; > } > > /* > - * This function is called by the lookup and getattr code to request > the > + * This function is called by the getattr code to request the > * use of readdirplus to accelerate any future lookups in the same > * directory. > */ > -void nfs_advise_use_readdirplus(struct inode *dir) > +void nfs_readdir_record_entry_cache_hit(struct inode *dir) > { > struct nfs_inode *nfsi = NFS_I(dir); > + struct nfs_open_dir_context *ctx; > > - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && > - !list_empty(&nfsi->open_files)) > - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); > + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) { > + list_for_each_entry_rcu (ctx, &nfsi->open_files, > list) > + atomic_inc(&ctx->cache_hits); Missing rcu_read_lock()/rcu_read_unlock() protection. Fixed in the version committed to the testing branch. > + } > } > > /* > * This function is mainly for use by nfs_getattr(). > * > * If this is an 'ls -l', we want to force use of readdirplus. > - * Do this by checking if there is an active file descriptor > - * and calling nfs_advise_use_readdirplus, then forcing a > - * cache flush. > */ > -void nfs_force_use_readdirplus(struct inode *dir) > +void nfs_readdir_record_entry_cache_miss(struct inode *dir) > { > struct nfs_inode *nfsi = NFS_I(dir); > + struct nfs_open_dir_context *ctx; > > - if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) && > - !list_empty(&nfsi->open_files)) { > - set_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags); > - set_bit(NFS_INO_FORCE_READDIR, &nfsi->flags); > + if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS)) { > + list_for_each_entry_rcu (ctx, &nfsi->open_files, > list) > + atomic_inc(&ctx->cache_misses); Ditto. > } > } > > +static void nfs_readdir_record_dcache_miss(struct inode *dir) > +{ > + nfs_readdir_record_entry_cache_miss(dir); > +} > + > static > void nfs_prime_dcache(struct dentry *parent, struct nfs_entry > *entry, > unsigned long dir_verifier) > @@ -1101,6 +1105,18 @@ static int uncached_readdir(struct > nfs_readdir_descriptor *desc) > return status; > } > > +#define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL) > + > +static void nfs_readdir_handle_cache_misses(struct inode *inode, > + struct > nfs_readdir_descriptor *desc, > + pgoff_t page_index, > + unsigned int > cache_misses) > +{ > + if (desc->ctx->pos != 0 && > + cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) > + invalidate_mapping_pages(inode->i_mapping, page_index > + 1, -1); > +} > + > /* The file offset position represents the dirent entry number. A > last cookie cache takes care of the common case of reading the > whole directory. > @@ -1112,6 +1128,7 @@ static int nfs_readdir(struct file *file, > struct dir_context *ctx) > struct nfs_inode *nfsi = NFS_I(inode); > struct nfs_open_dir_context *dir_ctx = file->private_data; > struct nfs_readdir_descriptor *desc; > + unsigned int cache_hits, cache_misses; > pgoff_t page_index; > int res; > > @@ -1137,7 +1154,6 @@ static int nfs_readdir(struct file *file, > struct dir_context *ctx) > goto out; > desc->file = file; > desc->ctx = ctx; > - desc->plus = nfs_use_readdirplus(inode, ctx); > desc->page_index_max = -1; > > spin_lock(&file->f_lock); > @@ -1150,6 +1166,8 @@ static int nfs_readdir(struct file *file, > struct dir_context *ctx) > desc->page_fill_misses = dir_ctx->page_fill_misses; > nfs_set_dtsize(desc, dir_ctx->dtsize); > memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf)); > + cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0); > + cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0); > spin_unlock(&file->f_lock); > > if (desc->eof) { > @@ -1157,9 +1175,8 @@ static int nfs_readdir(struct file *file, > struct dir_context *ctx) > goto out_free; > } > > - if (test_and_clear_bit(NFS_INO_FORCE_READDIR, &nfsi->flags) > && > - list_is_singular(&nfsi->open_files)) > - invalidate_mapping_pages(inode->i_mapping, page_index > + 1, -1); > + desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, > cache_misses); > + nfs_readdir_handle_cache_misses(inode, desc, page_index, > cache_misses); > > do { > res = readdir_search_pagecache(desc); > @@ -1178,7 +1195,6 @@ static int nfs_readdir(struct file *file, > struct dir_context *ctx) > break; > } > if (res == -ETOOSMALL && desc->plus) { > - clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi- > >flags); > nfs_zap_caches(inode); > desc->page_index = 0; > desc->plus = false; > @@ -1602,7 +1618,7 @@ nfs_lookup_revalidate_dentry(struct inode *dir, > struct dentry *dentry, > nfs_set_verifier(dentry, dir_verifier); > > /* set a readdirplus hint that we had a cache miss */ > - nfs_force_use_readdirplus(dir); > + nfs_readdir_record_dcache_miss(dir); > ret = 1; > out: > nfs_free_fattr(fattr); > @@ -1659,7 +1675,6 @@ nfs_do_lookup_revalidate(struct inode *dir, > struct dentry *dentry, > nfs_mark_dir_for_revalidate(dir); > goto out_bad; > } > - nfs_advise_use_readdirplus(dir); > goto out_valid; > } > > @@ -1866,7 +1881,7 @@ struct dentry *nfs_lookup(struct inode *dir, > struct dentry * dentry, unsigned in > goto out; > > /* Notify readdir to use READDIRPLUS */ > - nfs_force_use_readdirplus(dir); > + nfs_readdir_record_dcache_miss(dir); > > no_entry: > res = d_splice_alias(inode, dentry); > diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c > index f9fc506ebb29..1bef81f5373a 100644 > --- a/fs/nfs/inode.c > +++ b/fs/nfs/inode.c > @@ -789,7 +789,7 @@ static void > nfs_readdirplus_parent_cache_miss(struct dentry *dentry) > if (!nfs_server_capable(d_inode(dentry), > NFS_CAP_READDIRPLUS)) > return; > parent = dget_parent(dentry); > - nfs_force_use_readdirplus(d_inode(parent)); > + nfs_readdir_record_entry_cache_miss(d_inode(parent)); > dput(parent); > } > > @@ -800,7 +800,7 @@ static void > nfs_readdirplus_parent_cache_hit(struct dentry *dentry) > if (!nfs_server_capable(d_inode(dentry), > NFS_CAP_READDIRPLUS)) > return; > parent = dget_parent(dentry); > - nfs_advise_use_readdirplus(d_inode(parent)); > + nfs_readdir_record_entry_cache_hit(d_inode(parent)); > dput(parent); > } > > diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h > index 2de7c56a1fbe..46dc97b65661 100644 > --- a/fs/nfs/internal.h > +++ b/fs/nfs/internal.h > @@ -366,8 +366,8 @@ extern struct nfs_client *nfs_init_client(struct > nfs_client *clp, > const struct nfs_client_initdata *); > > /* dir.c */ > -extern void nfs_advise_use_readdirplus(struct inode *dir); > -extern void nfs_force_use_readdirplus(struct inode *dir); > +extern void nfs_readdir_record_entry_cache_hit(struct inode *dir); > +extern void nfs_readdir_record_entry_cache_miss(struct inode *dir); > extern unsigned long nfs_access_cache_count(struct shrinker *shrink, > struct shrink_control > *sc); > extern unsigned long nfs_access_cache_scan(struct shrinker *shrink, > diff --git a/fs/nfs/nfstrace.h b/fs/nfs/nfstrace.h > index 45a310b586ce..3672f6703ee7 100644 > --- a/fs/nfs/nfstrace.h > +++ b/fs/nfs/nfstrace.h > @@ -36,7 +36,6 @@ > > #define nfs_show_nfsi_flags(v) \ > __print_flags(v, "|", \ > - { BIT(NFS_INO_ADVISE_RDPLUS), "ADVISE_RDPLUS" > }, \ > { BIT(NFS_INO_STALE), "STALE" }, \ > { BIT(NFS_INO_ACL_LRU_SET), "ACL_LRU_SET" }, > \ > { BIT(NFS_INO_INVALIDATING), "INVALIDATING" > }, \ > diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h > index 9e5fc29723c2..e21bd9452d27 100644 > --- a/include/linux/nfs_fs.h > +++ b/include/linux/nfs_fs.h > @@ -101,6 +101,8 @@ struct nfs_open_context { > > struct nfs_open_dir_context { > struct list_head list; > + atomic_t cache_hits; > + atomic_t cache_misses; > unsigned long attr_gencount; > __be32 verf[NFS_DIR_VERIFIER_SIZE]; > __u64 dir_cookie; > @@ -110,6 +112,7 @@ struct nfs_open_dir_context { > unsigned int dtsize; > signed char duped; > bool eof; > + struct rcu_head rcu_head; > }; > > /* > @@ -274,13 +277,11 @@ struct nfs4_copy_state { > /* > * Bit offsets in flags field > */ > -#define NFS_INO_ADVISE_RDPLUS (0) /* advise readdirplus > */ > #define NFS_INO_STALE (1) /* possible stale > inode */ > #define NFS_INO_ACL_LRU_SET (2) /* Inode is on the > LRU list */ > #define NFS_INO_INVALIDATING (3) /* inode is being > invalidated */ > #define NFS_INO_PRESERVE_UNLINKED (4) /* preserve file if > removed while open */ > #define NFS_INO_FSCACHE (5) /* inode can > be cached by FS-Cache */ > -#define NFS_INO_FORCE_READDIR (7) /* force readdirplus > */ > #define NFS_INO_LAYOUTCOMMIT (9) /* layoutcommit > required */ > #define NFS_INO_LAYOUTCOMMITTING (10) /* layoutcommit > inflight */ > #define NFS_INO_LAYOUTSTATS (11) /* layoutstats > inflight */ -- Trond Myklebust Linux NFS client maintainer, Hammerspace trond.myklebust@xxxxxxxxxxxxxxx