Re: [Linux-cachefs] [RFC PATCH v2 09/11] ceph: convert readpages to fscache_read_helper

David Wysochanski <dwysocha@xxxxxxxxxx> · Mon, 10 Aug 2020 08:24:14 -0400

On Mon, Aug 10, 2020 at 7:09 AM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
>
> On Sun, 2020-08-09 at 11:09 -0400, David Wysochanski wrote:
> > On Fri, Jul 31, 2020 at 9:05 AM Jeff Layton <jlayton@xxxxxxxxxx> wrote:
> > > Convert ceph_readpages to use the fscache_read_helper. With this we can
> > > rip out a lot of the old readpage/readpages infrastructure.
> > >
> > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx>
> > > ---
> > >  fs/ceph/addr.c | 209 +++++++------------------------------------------
> > >  1 file changed, 28 insertions(+), 181 deletions(-)
> > >
> > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
> > > index cee497c108bb..8905fe4a0930 100644
> > > --- a/fs/ceph/addr.c
> > > +++ b/fs/ceph/addr.c
> > > @@ -377,76 +377,23 @@ static int ceph_readpage(struct file *filp, struct page *page)
> > >         return err;
> > >  }
> > >
> > > -/*
> > > - * Finish an async read(ahead) op.
> > > - */
> > > -static void finish_read(struct ceph_osd_request *req)
> > > -{
> > > -       struct inode *inode = req->r_inode;
> > > -       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > > -       struct ceph_osd_data *osd_data;
> > > -       int rc = req->r_result <= 0 ? req->r_result : 0;
> > > -       int bytes = req->r_result >= 0 ? req->r_result : 0;
> > > -       int num_pages;
> > > -       int i;
> > > -
> > > -       dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
> > > -       if (rc == -EBLACKLISTED)
> > > -               ceph_inode_to_client(inode)->blacklisted = true;
> > > -
> > > -       /* unlock all pages, zeroing any data we didn't read */
> > > -       osd_data = osd_req_op_extent_osd_data(req, 0);
> > > -       BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
> > > -       num_pages = calc_pages_for((u64)osd_data->alignment,
> > > -                                       (u64)osd_data->length);
> > > -       for (i = 0; i < num_pages; i++) {
> > > -               struct page *page = osd_data->pages[i];
> > > -
> > > -               if (rc < 0 && rc != -ENOENT)
> > > -                       goto unlock;
> > > -               if (bytes < (int)PAGE_SIZE) {
> > > -                       /* zero (remainder of) page */
> > > -                       int s = bytes < 0 ? 0 : bytes;
> > > -                       zero_user_segment(page, s, PAGE_SIZE);
> > > -               }
> > > -               dout("finish_read %p uptodate %p idx %lu\n", inode, page,
> > > -                    page->index);
> > > -               flush_dcache_page(page);
> > > -               SetPageUptodate(page);
> > > -unlock:
> > > -               unlock_page(page);
> > > -               put_page(page);
> > > -               bytes -= PAGE_SIZE;
> > > -       }
> > > -
> > > -       ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
> > > -                                req->r_end_latency, rc);
> > > -
> > > -       kfree(osd_data->pages);
> > > -}
> > > -
> > > -/*
> > > - * start an async read(ahead) operation.  return nr_pages we submitted
> > > - * a read for on success, or negative error code.
> > > - */
> > > -static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
> > > -                     struct list_head *page_list, int max)
> > > +static int ceph_readpages(struct file *file, struct address_space *mapping,
> > > +                         struct list_head *page_list, unsigned nr_pages)
> > >  {
> > > -       struct ceph_osd_client *osdc =
> > > -               &ceph_inode_to_client(inode)->client->osdc;
> > > +       struct inode *inode = file_inode(file);
> > >         struct ceph_inode_info *ci = ceph_inode(inode);
> > > -       struct page *page = lru_to_page(page_list);
> > > -       struct ceph_vino vino;
> > > -       struct ceph_osd_request *req;
> > > -       u64 off;
> > > -       u64 len;
> > > -       int i;
> > > -       struct page **pages;
> > > -       pgoff_t next_index;
> > > -       int nr_pages = 0;
> > > +       struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
> > > +       struct ceph_file_info *fi = file->private_data;
> > > +       struct ceph_rw_context *rw_ctx;
> > > +       struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
> > >         int got = 0;
> > >         int ret = 0;
> > > +       int max = fsc->mount_options->rsize >> PAGE_SHIFT;
> >
> > Have you ran tests with different values of rsize?
> > Specifically, rsize < readahead_size == size_of_readpages
> >
> > I'm seeing a lot of problems with NFS when varying rsize are used wrt
> > readahead values.  Specifically I'm seeing panics because fscache
> > expects a 1:1 mapping of issue_op() to io_done() calls, and I get
> > panics because multiple read completions are trying to unlock the
> > same pages inside fscache_read_done().
> >
> > My understanding is afs does not have such 'rsize' limitation, so it
> > may not be an area that is well tested.  It could be my implementation
> > of the NFS conversion though, as I thinkwhat needs to happen is the
> > respect the above 1:1 mapping of issue_op() to io_done() calls, and my
> > initial implementation did not do that.
> >
> > FWIW, specifically this unit test was originally failing for me with a panic.
> > Sun 09 Aug 2020 11:03:22 AM EDT: 1. On NFS client, install and enable
> > cachefilesd
> > Sun 09 Aug 2020 11:03:22 AM EDT: 2. On NFS client, mount -o
> > vers=4.1,fsc,rsize=16384 127.0.0.1:/export/dir1 /mnt/dir1
> > Sun 09 Aug 2020 11:03:22 AM EDT: 3. On NFS client, dd if=/dev/zero
> > of=/mnt/dir1/file1.bin bs=65536 count=1
> > Sun 09 Aug 2020 11:03:22 AM EDT: 4. On NFS client, echo 3 >
> > /proc/sys/vm/drop_caches
> > Sun 09 Aug 2020 11:03:22 AM EDT: 5. On NFS client, ./nfs-readahead.sh
> > set /mnt/dir1 65536
> > Sun 09 Aug 2020 11:03:23 AM EDT: 8. On NFS client, echo 3 >
> > /proc/sys/vm/drop_caches
> > Sun 09 Aug 2020 11:03:23 AM EDT: 9. On NFS client, dd
> > if=/mnt/dir1/file1.bin of=/dev/null
> >
> >
>
> I haven't tested much with varying rsize and wsize (setting them on
> cephfs is pretty rare), but I'll plan to. What's in nfs-readahead.sh?
>
>

See attached.
Attachment:
nfs-readahead.sh

Description: application/shellscript