On Fri, Jul 31, 2020 at 9:05 AM Jeff Layton <jlayton@xxxxxxxxxx> wrote: > > Convert ceph_readpages to use the fscache_read_helper. With this we can > rip out a lot of the old readpage/readpages infrastructure. > > Signed-off-by: Jeff Layton <jlayton@xxxxxxxxxx> > --- > fs/ceph/addr.c | 209 +++++++------------------------------------------ > 1 file changed, 28 insertions(+), 181 deletions(-) > > diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c > index cee497c108bb..8905fe4a0930 100644 > --- a/fs/ceph/addr.c > +++ b/fs/ceph/addr.c > @@ -377,76 +377,23 @@ static int ceph_readpage(struct file *filp, struct page *page) > return err; > } > > -/* > - * Finish an async read(ahead) op. > - */ > -static void finish_read(struct ceph_osd_request *req) > -{ > - struct inode *inode = req->r_inode; > - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); > - struct ceph_osd_data *osd_data; > - int rc = req->r_result <= 0 ? req->r_result : 0; > - int bytes = req->r_result >= 0 ? req->r_result : 0; > - int num_pages; > - int i; > - > - dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes); > - if (rc == -EBLACKLISTED) > - ceph_inode_to_client(inode)->blacklisted = true; > - > - /* unlock all pages, zeroing any data we didn't read */ > - osd_data = osd_req_op_extent_osd_data(req, 0); > - BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES); > - num_pages = calc_pages_for((u64)osd_data->alignment, > - (u64)osd_data->length); > - for (i = 0; i < num_pages; i++) { > - struct page *page = osd_data->pages[i]; > - > - if (rc < 0 && rc != -ENOENT) > - goto unlock; > - if (bytes < (int)PAGE_SIZE) { > - /* zero (remainder of) page */ > - int s = bytes < 0 ? 0 : bytes; > - zero_user_segment(page, s, PAGE_SIZE); > - } > - dout("finish_read %p uptodate %p idx %lu\n", inode, page, > - page->index); > - flush_dcache_page(page); > - SetPageUptodate(page); > -unlock: > - unlock_page(page); > - put_page(page); > - bytes -= PAGE_SIZE; > - } > - > - ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency, > - req->r_end_latency, rc); > - > - kfree(osd_data->pages); > -} > - > -/* > - * start an async read(ahead) operation. return nr_pages we submitted > - * a read for on success, or negative error code. > - */ > -static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, > - struct list_head *page_list, int max) > +static int ceph_readpages(struct file *file, struct address_space *mapping, > + struct list_head *page_list, unsigned nr_pages) > { > - struct ceph_osd_client *osdc = > - &ceph_inode_to_client(inode)->client->osdc; > + struct inode *inode = file_inode(file); > struct ceph_inode_info *ci = ceph_inode(inode); > - struct page *page = lru_to_page(page_list); > - struct ceph_vino vino; > - struct ceph_osd_request *req; > - u64 off; > - u64 len; > - int i; > - struct page **pages; > - pgoff_t next_index; > - int nr_pages = 0; > + struct ceph_fs_client *fsc = ceph_inode_to_client(inode); > + struct ceph_file_info *fi = file->private_data; > + struct ceph_rw_context *rw_ctx; > + struct fscache_cookie *cookie = ceph_fscache_cookie(ci); > int got = 0; > int ret = 0; > + int max = fsc->mount_options->rsize >> PAGE_SHIFT; Have you ran tests with different values of rsize? Specifically, rsize < readahead_size == size_of_readpages I'm seeing a lot of problems with NFS when varying rsize are used wrt readahead values. Specifically I'm seeing panics because fscache expects a 1:1 mapping of issue_op() to io_done() calls, and I get panics because multiple read completions are trying to unlock the same pages inside fscache_read_done(). My understanding is afs does not have such 'rsize' limitation, so it may not be an area that is well tested. It could be my implementation of the NFS conversion though, as I thinkwhat needs to happen is the respect the above 1:1 mapping of issue_op() to io_done() calls, and my initial implementation did not do that. FWIW, specifically this unit test was originally failing for me with a panic. Sun 09 Aug 2020 11:03:22 AM EDT: 1. On NFS client, install and enable cachefilesd Sun 09 Aug 2020 11:03:22 AM EDT: 2. On NFS client, mount -o vers=4.1,fsc,rsize=16384 127.0.0.1:/export/dir1 /mnt/dir1 Sun 09 Aug 2020 11:03:22 AM EDT: 3. On NFS client, dd if=/dev/zero of=/mnt/dir1/file1.bin bs=65536 count=1 Sun 09 Aug 2020 11:03:22 AM EDT: 4. On NFS client, echo 3 > /proc/sys/vm/drop_caches Sun 09 Aug 2020 11:03:22 AM EDT: 5. On NFS client, ./nfs-readahead.sh set /mnt/dir1 65536 Sun 09 Aug 2020 11:03:23 AM EDT: 6. On NFS client, dd if=/mnt/dir1/file1.bin of=/dev/null Sun 09 Aug 2020 11:03:23 AM EDT: 8. On NFS client, echo 3 > /proc/sys/vm/drop_caches Sun 09 Aug 2020 11:03:23 AM EDT: 9. On NFS client, dd if=/mnt/dir1/file1.bin of=/dev/null > + > + if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE) > + return -EINVAL; > > + rw_ctx = ceph_find_rw_context(fi); > if (!rw_ctx) { > /* caller of readpages does not hold buffer and read caps > * (fadvise, madvise and readahead cases) */ > @@ -459,133 +406,33 @@ static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx, > dout("start_read %p, no cache cap\n", inode); > ret = 0; > } > - if (ret <= 0) { > - if (got) > - ceph_put_cap_refs(ci, got); > - while (!list_empty(page_list)) { > - page = lru_to_page(page_list); > - list_del(&page->lru); > - put_page(page); > - } > - return ret; > - } > + if (ret <= 0) > + goto out; > } > > - off = (u64) page_offset(page); > + dout("readpages %p file %p ctx %p nr_pages %d max %d\n", > + inode, file, rw_ctx, nr_pages, max); > > - /* count pages */ > - next_index = page->index; > - list_for_each_entry_reverse(page, page_list, lru) { > - if (page->index != next_index) > - break; > - nr_pages++; > - next_index++; > - if (max && nr_pages == max) > - break; > - } > - len = nr_pages << PAGE_SHIFT; > - dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages, > - off, len); > - vino = ceph_vino(inode); > - req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, > - 0, 1, CEPH_OSD_OP_READ, > - CEPH_OSD_FLAG_READ, NULL, > - ci->i_truncate_seq, ci->i_truncate_size, > - false); > - if (IS_ERR(req)) { > - ret = PTR_ERR(req); > - goto out; > - } > + while (ret >= 0 && !list_empty(page_list)) { > + struct ceph_fscache_req *req = ceph_fsreq_alloc(); > > - /* build page vector */ > - nr_pages = calc_pages_for(0, len); > - pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL); > - if (!pages) { > - ret = -ENOMEM; > - goto out_put; > - } > - for (i = 0; i < nr_pages; ++i) { > - page = list_entry(page_list->prev, struct page, lru); > - BUG_ON(PageLocked(page)); > - list_del(&page->lru); > - > - dout("start_read %p adding %p idx %lu\n", inode, page, > - page->index); > - if (add_to_page_cache_lru(page, &inode->i_data, page->index, > - GFP_KERNEL)) { > - put_page(page); > - dout("start_read %p add_to_page_cache failed %p\n", > - inode, page); > - nr_pages = i; > - if (nr_pages > 0) { > - len = nr_pages << PAGE_SHIFT; > - osd_req_op_extent_update(req, 0, len); > - break; > - } > - goto out_pages; > + if (!req) { > + ret = -ENOMEM; > + break; > } > - pages[i] = page; > - } > - osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false); > - req->r_callback = finish_read; > - req->r_inode = inode; > - > - dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len); > - ret = ceph_osdc_start_request(osdc, req, false); > - if (ret < 0) > - goto out_pages; > - ceph_osdc_put_request(req); > - > - /* After adding locked pages to page cache, the inode holds cache cap. > - * So we can drop our cap refs. */ > - if (got) > - ceph_put_cap_refs(ci, got); > - > - return nr_pages; > + fscache_init_io_request(&req->fscache_req, cookie, &ceph_readpage_fsreq_ops); > + req->fscache_req.mapping = inode->i_mapping; > > -out_pages: > - for (i = 0; i < nr_pages; ++i) { > - unlock_page(pages[i]); > + ret = fscache_read_helper_page_list(&req->fscache_req, page_list, max); > + ceph_fsreq_put(&req->fscache_req); > } > - ceph_put_page_vector(pages, nr_pages, false); > -out_put: > - ceph_osdc_put_request(req); > out: > + /* After adding locked pages to page cache, the inode holds Fc refs. We can drop ours. */ > if (got) > ceph_put_cap_refs(ci, got); > - return ret; > -} > > - > -/* > - * Read multiple pages. Leave pages we don't read + unlock in page_list; > - * the caller (VM) cleans them up. > - */ > -static int ceph_readpages(struct file *file, struct address_space *mapping, > - struct list_head *page_list, unsigned nr_pages) > -{ > - struct inode *inode = file_inode(file); > - struct ceph_fs_client *fsc = ceph_inode_to_client(inode); > - struct ceph_file_info *fi = file->private_data; > - struct ceph_rw_context *rw_ctx; > - int rc = 0; > - int max = 0; > - > - if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE) > - return -EINVAL; > - > - rw_ctx = ceph_find_rw_context(fi); > - max = fsc->mount_options->rsize >> PAGE_SHIFT; > - dout("readpages %p file %p ctx %p nr_pages %d max %d\n", > - inode, file, rw_ctx, nr_pages, max); > - while (!list_empty(page_list)) { > - rc = start_read(inode, rw_ctx, page_list, max); > - if (rc < 0) > - goto out; > - } > -out: > - dout("readpages %p file %p ret %d\n", inode, file, rc); > - return rc; > + dout("readpages %p file %p ret %d\n", inode, file, ret); > + return ret; > } > > struct ceph_writeback_ctl > -- > 2.26.2 > > > -- > Linux-cachefs mailing list > Linux-cachefs@xxxxxxxxxx > https://www.redhat.com/mailman/listinfo/linux-cachefs >