On Fri, Nov 6, 2020 at 10:05 AM Trond Myklebust <trondmy@xxxxxxxxxxxxxxx> wrote: > > On Fri, 2020-11-06 at 08:30 -0500, David Wysochanski wrote: > > On Wed, Nov 4, 2020 at 11:27 AM <trondmy@xxxxxxxxx> wrote: > > > > > > From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > > > > > If a readdir call returns more data than we can fit into one page > > > cache page, then allocate a new one for that data rather than > > > discarding the data. > > > > > > Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx> > > > --- > > > fs/nfs/dir.c | 46 ++++++++++++++++++++++++++++++++++++++++++---- > > > 1 file changed, 42 insertions(+), 4 deletions(-) > > > > > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > > > index 842f69120a01..f7248145c333 100644 > > > --- a/fs/nfs/dir.c > > > +++ b/fs/nfs/dir.c > > > @@ -320,6 +320,26 @@ static void nfs_readdir_page_set_eof(struct > > > page *page) > > > kunmap_atomic(array); > > > } > > > > > > +static void nfs_readdir_page_unlock_and_put(struct page *page) > > > +{ > > > + unlock_page(page); > > > + put_page(page); > > > +} > > > + > > > +static struct page *nfs_readdir_page_get_next(struct address_space > > > *mapping, > > > + pgoff_t index, u64 > > > cookie) > > > +{ > > > + struct page *page; > > > + > > > + page = nfs_readdir_page_get_locked(mapping, index, cookie); > > > + if (page) { > > > + if (nfs_readdir_page_last_cookie(page) == cookie) > > > + return page; > > > + nfs_readdir_page_unlock_and_put(page); > > > + } > > > + return NULL; > > > +} > > > + > > > static inline > > > int is_32bit_api(void) > > > { > > > @@ -637,13 +657,15 @@ void nfs_prime_dcache(struct dentry *parent, > > > struct nfs_entry *entry, > > > } > > > > > > /* Perform conversion from xdr to cache array */ > > > -static > > > -int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct > > > nfs_entry *entry, > > > - struct page **xdr_pages, struct > > > page *page, unsigned int buflen) > > > +static int nfs_readdir_page_filler(struct nfs_readdir_descriptor > > > *desc, > > > + struct nfs_entry *entry, > > > + struct page **xdr_pages, > > > + struct page *fillme, unsigned > > > int buflen) > > > { > > > + struct address_space *mapping = desc->file->f_mapping; > > > struct xdr_stream stream; > > > struct xdr_buf buf; > > > - struct page *scratch; > > > + struct page *scratch, *new, *page = fillme; > > > int status; > > > > > > scratch = alloc_page(GFP_KERNEL); > > > @@ -666,6 +688,19 @@ int > > > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct > > > nfs_entry *en > > > desc->dir_verifier); > > > > > > status = nfs_readdir_add_to_array(entry, page); > > > + if (status != -ENOSPC) > > > + continue; > > > + > > > + if (page->mapping != mapping) > > > + break; > > > + new = nfs_readdir_page_get_next(mapping, page- > > > >index + 1, > > > + entry- > > > >prev_cookie); > > > + if (!new) > > > + break; > > > + if (page != fillme) > > > + nfs_readdir_page_unlock_and_put(page); > > > + page = new; > > > + status = nfs_readdir_add_to_array(entry, page); > > > } while (!status && !entry->eof); > > > > > > switch (status) { > > > @@ -681,6 +716,9 @@ int > > > nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct > > > nfs_entry *en > > > break; > > > } > > > > > > + if (page != fillme) > > > + nfs_readdir_page_unlock_and_put(page); > > > + > > > put_page(scratch); > > > return status; > > > } > > > -- > > > 2.28.0 > > > > > > > It doesn't look like this handles uncached_readdir. Were you > > planning > > on addressing that somehow, or should we think about something like > > this to move dtsize up as a parameter to nfs_readdir_xdr_to_array(), > > and force uncached_readdir() to 1 page: > > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > > index b6c3501e8f61..ca30e2dbb9c3 100644 > > --- a/fs/nfs/dir.c > > +++ b/fs/nfs/dir.c > > @@ -791,13 +791,12 @@ static struct page > > **nfs_readdir_alloc_pages(size_t npages) > > > > static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor > > *desc, > > struct page *page, __be32 > > *verf_arg, > > - __be32 *verf_res) > > + __be32 *verf_res, size_t dtsize) > > { > > struct page **pages; > > struct nfs_entry *entry; > > size_t array_size; > > struct inode *inode = file_inode(desc->file); > > - size_t dtsize = NFS_SERVER(inode)->dtsize; > > int status = -ENOMEM; > > > > entry = kzalloc(sizeof(*entry), GFP_KERNEL); > > @@ -879,13 +878,15 @@ static int find_and_lock_cache_page(struct > > nfs_readdir_descriptor *desc) > > struct nfs_inode *nfsi = NFS_I(inode); > > __be32 verf[NFS_DIR_VERIFIER_SIZE]; > > int res; > > + size_t dtsize = NFS_SERVER(inode)->dtsize; > > > > desc->page = nfs_readdir_page_get_cached(desc); > > if (!desc->page) > > return -ENOMEM; > > if (nfs_readdir_page_needs_filling(desc->page)) { > > res = nfs_readdir_xdr_to_array(desc, desc->page, > > - nfsi->cookieverf, > > verf); > > + nfsi->cookieverf, > > verf, > > + dtsize); > > if (res < 0) { > > nfs_readdir_page_unlock_and_put_cached(desc); > > if (res == -EBADCOOKIE || res == -ENOTSYNC) { > > @@ -995,7 +996,8 @@ static int uncached_readdir(struct > > nfs_readdir_descriptor *desc) > > desc->duped = 0; > > > > nfs_readdir_page_init_array(page, desc->dir_cookie); > > - status = nfs_readdir_xdr_to_array(desc, page, desc->verf, > > verf); > > + status = nfs_readdir_xdr_to_array(desc, page, desc->verf, > > verf, > > + PAGE_SIZE); > > if (status < 0) > > goto out_release; > > > > Actually for uncached readdir, I was thinking we might want to convert > nfs_readdir_xdr_to_array() and nfs_readdir_page_filler() to take an > array of pages + buffer size. > IOW: convert uncached_readdir() to allocate an array of pages, and pass > in a 'struct page **' + a buffer length. > Yes I agree and this looks more like the right way to fix it rather than this single page approach. > I don't like the idea of passing in a dtsize because that restricts the > size of the READDIR RPC request buffer instead of restricting the > number of entries the server returns. For any given buffer size, that > number of entries fluctuates wildly depending on the filenames in that > directory and their differing lengths, whereas your page can take a > fixed number of entries irrespective of the filename lengths (in fact > it can always take 127 entries on an x86_64). > > It is true that the number of entries that nfs_do_filldir() can handle > also depends on the filename length, but we don't have any information > in the filesystem about the buffer size that was passed in to the > getdents() system call of how much space remains in that buffer. All > that information is hidden in the opaque 'struct dir_context'. So for > that reason, we can't use that information to set a dtsize either. > > -- > Trond Myklebust > Linux NFS client maintainer, Hammerspace > trond.myklebust@xxxxxxxxxxxxxxx > >