On Tue, 2022-03-08 at 23:29 +0000, David Howells wrote: > Split fs/netfs/read_helper.c into two pieces, one to deal with buffered > writes and one to deal with the I/O mechanism. > I think you mean buffered reads here? > Changes > ======= > ver #2) > - Add kdoc reference to new file. > > Signed-off-by: David Howells <dhowells@xxxxxxxxxx> > cc: linux-cachefs@xxxxxxxxxx > > Link: https://lore.kernel.org/r/164623005586.3564931.6149556072728481767.stgit@xxxxxxxxxxxxxxxxxxxxxx/ # v1 > --- > > fs/netfs/Makefile | 1 > fs/netfs/buffered_read.c | 428 ++++++++++++++++++++++++++++++++++++++++++++++ > fs/netfs/io.c | 418 --------------------------------------------- > 3 files changed, 429 insertions(+), 418 deletions(-) > create mode 100644 fs/netfs/buffered_read.c > > diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile > index 51ece4f7bc77..88b904532bc7 100644 > --- a/fs/netfs/Makefile > +++ b/fs/netfs/Makefile > @@ -1,6 +1,7 @@ > # SPDX-License-Identifier: GPL-2.0 > > netfs-y := \ > + buffered_read.o \ > io.o \ > objects.o > > diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c > new file mode 100644 > index 000000000000..09ba7097a970 > --- /dev/null > +++ b/fs/netfs/buffered_read.c > @@ -0,0 +1,428 @@ > +// SPDX-License-Identifier: GPL-2.0-or-later > +/* Network filesystem high-level buffered read support. > + * > + * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. > + * Written by David Howells (dhowells@xxxxxxxxxx) > + */ > + > +#include <linux/export.h> > +#include <linux/task_io_accounting_ops.h> > +#include "internal.h" > + > +/* > + * Unlock the folios in a read operation. We need to set PG_fscache on any > + * folios we're going to write back before we unlock them. > + */ > +void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) > +{ > + struct netfs_io_subrequest *subreq; > + struct folio *folio; > + unsigned int iopos, account = 0; > + pgoff_t start_page = rreq->start / PAGE_SIZE; > + pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; > + bool subreq_failed = false; > + > + XA_STATE(xas, &rreq->mapping->i_pages, start_page); > + > + if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { > + __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); > + list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { > + __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); > + } > + } > + > + /* Walk through the pagecache and the I/O request lists simultaneously. > + * We may have a mixture of cached and uncached sections and we only > + * really want to write out the uncached sections. This is slightly > + * complicated by the possibility that we might have huge pages with a > + * mixture inside. > + */ > + subreq = list_first_entry(&rreq->subrequests, > + struct netfs_io_subrequest, rreq_link); > + iopos = 0; > + subreq_failed = (subreq->error < 0); > + > + trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); > + > + rcu_read_lock(); > + xas_for_each(&xas, folio, last_page) { > + unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; > + unsigned int pgend = pgpos + folio_size(folio); > + bool pg_failed = false; > + > + for (;;) { > + if (!subreq) { > + pg_failed = true; > + break; > + } > + if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) > + folio_start_fscache(folio); > + pg_failed |= subreq_failed; > + if (pgend < iopos + subreq->len) > + break; > + > + account += subreq->transferred; > + iopos += subreq->len; > + if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { > + subreq = list_next_entry(subreq, rreq_link); > + subreq_failed = (subreq->error < 0); > + } else { > + subreq = NULL; > + subreq_failed = false; > + } > + if (pgend == iopos) > + break; > + } > + > + if (!pg_failed) { > + flush_dcache_folio(folio); > + folio_mark_uptodate(folio); > + } > + > + if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { > + if (folio_index(folio) == rreq->no_unlock_folio && > + test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) > + _debug("no unlock"); > + else > + folio_unlock(folio); > + } > + } > + rcu_read_unlock(); > + > + task_io_account_read(account); > + if (rreq->netfs_ops->done) > + rreq->netfs_ops->done(rreq); > +} > + > +static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, > + loff_t *_start, size_t *_len, loff_t i_size) > +{ > + struct netfs_cache_resources *cres = &rreq->cache_resources; > + > + if (cres->ops && cres->ops->expand_readahead) > + cres->ops->expand_readahead(cres, _start, _len, i_size); > +} > + > +static void netfs_rreq_expand(struct netfs_io_request *rreq, > + struct readahead_control *ractl) > +{ > + /* Give the cache a chance to change the request parameters. The > + * resultant request must contain the original region. > + */ > + netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); > + > + /* Give the netfs a chance to change the request parameters. The > + * resultant request must contain the original region. > + */ > + if (rreq->netfs_ops->expand_readahead) > + rreq->netfs_ops->expand_readahead(rreq); > + > + /* Expand the request if the cache wants it to start earlier. Note > + * that the expansion may get further extended if the VM wishes to > + * insert THPs and the preferred start and/or end wind up in the middle > + * of THPs. > + * > + * If this is the case, however, the THP size should be an integer > + * multiple of the cache granule size, so we get a whole number of > + * granules to deal with. > + */ > + if (rreq->start != readahead_pos(ractl) || > + rreq->len != readahead_length(ractl)) { > + readahead_expand(ractl, rreq->start, rreq->len); > + rreq->start = readahead_pos(ractl); > + rreq->len = readahead_length(ractl); > + > + trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), > + netfs_read_trace_expanded); > + } > +} > + > +/** > + * netfs_readahead - Helper to manage a read request > + * @ractl: The description of the readahead request > + * > + * Fulfil a readahead request by drawing data from the cache if possible, or > + * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O > + * requests from different sources will get munged together. If necessary, the > + * readahead window can be expanded in either direction to a more convenient > + * alighment for RPC efficiency or to make storage in the cache feasible. > + * > + * The calling netfs must initialise a netfs context contiguous to the vfs > + * inode before calling this. > + * > + * This is usable whether or not caching is enabled. > + */ > +void netfs_readahead(struct readahead_control *ractl) > +{ > + struct netfs_io_request *rreq; > + struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host); > + int ret; > + > + _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); > + > + if (readahead_count(ractl) == 0) > + return; > + > + rreq = netfs_alloc_request(ractl->mapping, ractl->file, > + readahead_pos(ractl), > + readahead_length(ractl), > + NETFS_READAHEAD); > + if (IS_ERR(rreq)) > + return; > + > + if (ctx->ops->begin_cache_operation) { > + ret = ctx->ops->begin_cache_operation(rreq); > + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) > + goto cleanup_free; > + } > + > + netfs_stat(&netfs_n_rh_readahead); > + trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), > + netfs_read_trace_readahead); > + > + netfs_rreq_expand(rreq, ractl); > + > + /* Drop the refs on the folios here rather than in the cache or > + * filesystem. The locks will be dropped in netfs_rreq_unlock(). > + */ > + while (readahead_folio(ractl)) > + ; > + > + netfs_begin_read(rreq, false); > + return; > + > +cleanup_free: > + netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); > + return; > +} > +EXPORT_SYMBOL(netfs_readahead); > + > +/** > + * netfs_readpage - Helper to manage a readpage request > + * @file: The file to read from > + * @subpage: A subpage of the folio to read > + * > + * Fulfil a readpage request by drawing data from the cache if possible, or the > + * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests > + * from different sources will get munged together. > + * > + * The calling netfs must initialise a netfs context contiguous to the vfs > + * inode before calling this. > + * > + * This is usable whether or not caching is enabled. > + */ > +int netfs_readpage(struct file *file, struct page *subpage) > +{ > + struct folio *folio = page_folio(subpage); > + struct address_space *mapping = folio_file_mapping(folio); > + struct netfs_io_request *rreq; > + struct netfs_i_context *ctx = netfs_i_context(mapping->host); > + int ret; > + > + _enter("%lx", folio_index(folio)); > + > + rreq = netfs_alloc_request(mapping, file, > + folio_file_pos(folio), folio_size(folio), > + NETFS_READPAGE); > + if (IS_ERR(rreq)) { > + ret = PTR_ERR(rreq); > + goto alloc_error; > + } > + > + if (ctx->ops->begin_cache_operation) { > + ret = ctx->ops->begin_cache_operation(rreq); > + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) > + goto discard; > + } > + > + netfs_stat(&netfs_n_rh_readpage); > + trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); > + return netfs_begin_read(rreq, true); > + > +discard: > + netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); > +alloc_error: > + folio_unlock(folio); > + return ret; > +} > +EXPORT_SYMBOL(netfs_readpage); > + > +/* > + * Prepare a folio for writing without reading first > + * @folio: The folio being prepared > + * @pos: starting position for the write > + * @len: length of write > + * @always_fill: T if the folio should always be completely filled/cleared > + * > + * In some cases, write_begin doesn't need to read at all: > + * - full folio write > + * - write that lies in a folio that is completely beyond EOF > + * - write that covers the folio from start to EOF or beyond it > + * > + * If any of these criteria are met, then zero out the unwritten parts > + * of the folio and return true. Otherwise, return false. > + */ > +static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, > + bool always_fill) > +{ > + struct inode *inode = folio_inode(folio); > + loff_t i_size = i_size_read(inode); > + size_t offset = offset_in_folio(folio, pos); > + size_t plen = folio_size(folio); > + > + if (unlikely(always_fill)) { > + if (pos - offset + len <= i_size) > + return false; /* Page entirely before EOF */ > + zero_user_segment(&folio->page, 0, plen); > + folio_mark_uptodate(folio); > + return true; > + } > + > + /* Full folio write */ > + if (offset == 0 && len >= plen) > + return true; > + > + /* Page entirely beyond the end of the file */ > + if (pos - offset >= i_size) > + goto zero_out; > + > + /* Write that covers from the start of the folio to EOF or beyond */ > + if (offset == 0 && (pos + len) >= i_size) > + goto zero_out; > + > + return false; > +zero_out: > + zero_user_segments(&folio->page, 0, offset, offset + len, len); > + return true; > +} > + > +/** > + * netfs_write_begin - Helper to prepare for writing > + * @file: The file to read from > + * @mapping: The mapping to read from > + * @pos: File position at which the write will begin > + * @len: The length of the write (may extend beyond the end of the folio chosen) > + * @aop_flags: AOP_* flags > + * @_folio: Where to put the resultant folio > + * @_fsdata: Place for the netfs to store a cookie > + * > + * Pre-read data for a write-begin request by drawing data from the cache if > + * possible, or the netfs if not. Space beyond the EOF is zero-filled. > + * Multiple I/O requests from different sources will get munged together. If > + * necessary, the readahead window can be expanded in either direction to a > + * more convenient alighment for RPC efficiency or to make storage in the cache > + * feasible. > + * > + * The calling netfs must provide a table of operations, only one of which, > + * issue_op, is mandatory. > + * > + * The check_write_begin() operation can be provided to check for and flush > + * conflicting writes once the folio is grabbed and locked. It is passed a > + * pointer to the fsdata cookie that gets returned to the VM to be passed to > + * write_end. It is permitted to sleep. It should return 0 if the request > + * should go ahead; unlock the folio and return -EAGAIN to cause the folio to > + * be regot; or return an error. > + * > + * The calling netfs must initialise a netfs context contiguous to the vfs > + * inode before calling this. > + * > + * This is usable whether or not caching is enabled. > + */ > +int netfs_write_begin(struct file *file, struct address_space *mapping, > + loff_t pos, unsigned int len, unsigned int aop_flags, > + struct folio **_folio, void **_fsdata) > +{ > + struct netfs_io_request *rreq; > + struct netfs_i_context *ctx = netfs_i_context(file_inode(file )); > + struct folio *folio; > + unsigned int fgp_flags; > + pgoff_t index = pos >> PAGE_SHIFT; > + int ret; > + > + DEFINE_READAHEAD(ractl, file, NULL, mapping, index); > + > +retry: > + fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; > + if (aop_flags & AOP_FLAG_NOFS) > + fgp_flags |= FGP_NOFS; > + folio = __filemap_get_folio(mapping, index, fgp_flags, > + mapping_gfp_mask(mapping)); > + if (!folio) > + return -ENOMEM; > + > + if (ctx->ops->check_write_begin) { > + /* Allow the netfs (eg. ceph) to flush conflicts. */ > + ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); > + if (ret < 0) { > + trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); > + if (ret == -EAGAIN) > + goto retry; > + goto error; > + } > + } > + > + if (folio_test_uptodate(folio)) > + goto have_folio; > + > + /* If the page is beyond the EOF, we want to clear it - unless it's > + * within the cache granule containing the EOF, in which case we need > + * to preload the granule. > + */ > + if (!netfs_is_cache_enabled(ctx) && > + netfs_skip_folio_read(folio, pos, len, false)) { > + netfs_stat(&netfs_n_rh_write_zskip); > + goto have_folio_no_wait; > + } > + > + rreq = netfs_alloc_request(mapping, file, > + folio_file_pos(folio), folio_size(folio), > + NETFS_READ_FOR_WRITE); > + if (IS_ERR(rreq)) { > + ret = PTR_ERR(rreq); > + goto error; > + } > + rreq->no_unlock_folio = folio_index(folio); > + __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); > + > + if (ctx->ops->begin_cache_operation) { > + ret = ctx->ops->begin_cache_operation(rreq); > + if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) > + goto error_put; > + } > + > + netfs_stat(&netfs_n_rh_write_begin); > + trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); > + > + /* Expand the request to meet caching requirements and download > + * preferences. > + */ > + ractl._nr_pages = folio_nr_pages(folio); > + netfs_rreq_expand(rreq, &ractl); > + > + /* We hold the folio locks, so we can drop the references */ > + folio_get(folio); > + while (readahead_folio(&ractl)) > + ; > + > + ret = netfs_begin_read(rreq, true); > + if (ret < 0) > + goto error; > + > +have_folio: > + ret = folio_wait_fscache_killable(folio); > + if (ret < 0) > + goto error; > +have_folio_no_wait: > + *_folio = folio; > + _leave(" = 0"); > + return 0; > + > +error_put: > + netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); > +error: > + folio_unlock(folio); > + folio_put(folio); > + _leave(" = %d", ret); > + return ret; > +} > +EXPORT_SYMBOL(netfs_write_begin); > diff --git a/fs/netfs/io.c b/fs/netfs/io.c > index 058a534ba917..1fe9706c58a5 100644 > --- a/fs/netfs/io.c > +++ b/fs/netfs/io.c > @@ -246,91 +246,6 @@ static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq) > BUG(); > } > > -/* > - * Unlock the folios in a read operation. We need to set PG_fscache on any > - * folios we're going to write back before we unlock them. > - */ > -void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) > -{ > - struct netfs_io_subrequest *subreq; > - struct folio *folio; > - unsigned int iopos, account = 0; > - pgoff_t start_page = rreq->start / PAGE_SIZE; > - pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; > - bool subreq_failed = false; > - > - XA_STATE(xas, &rreq->mapping->i_pages, start_page); > - > - if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) { > - __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags); > - list_for_each_entry(subreq, &rreq->subrequests, rreq_link) { > - __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags); > - } > - } > - > - /* Walk through the pagecache and the I/O request lists simultaneously. > - * We may have a mixture of cached and uncached sections and we only > - * really want to write out the uncached sections. This is slightly > - * complicated by the possibility that we might have huge pages with a > - * mixture inside. > - */ > - subreq = list_first_entry(&rreq->subrequests, > - struct netfs_io_subrequest, rreq_link); > - iopos = 0; > - subreq_failed = (subreq->error < 0); > - > - trace_netfs_rreq(rreq, netfs_rreq_trace_unlock); > - > - rcu_read_lock(); > - xas_for_each(&xas, folio, last_page) { > - unsigned int pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; > - unsigned int pgend = pgpos + folio_size(folio); > - bool pg_failed = false; > - > - for (;;) { > - if (!subreq) { > - pg_failed = true; > - break; > - } > - if (test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) > - folio_start_fscache(folio); > - pg_failed |= subreq_failed; > - if (pgend < iopos + subreq->len) > - break; > - > - account += subreq->transferred; > - iopos += subreq->len; > - if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) { > - subreq = list_next_entry(subreq, rreq_link); > - subreq_failed = (subreq->error < 0); > - } else { > - subreq = NULL; > - subreq_failed = false; > - } > - if (pgend == iopos) > - break; > - } > - > - if (!pg_failed) { > - flush_dcache_folio(folio); > - folio_mark_uptodate(folio); > - } > - > - if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { > - if (folio_index(folio) == rreq->no_unlock_folio && > - test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) > - _debug("no unlock"); > - else > - folio_unlock(folio); > - } > - } > - rcu_read_unlock(); > - > - task_io_account_read(account); > - if (rreq->netfs_ops->done) > - rreq->netfs_ops->done(rreq); > -} > - > /* > * Handle a short read. > */ > @@ -750,336 +665,3 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync) > } > return ret; > } > - > -static void netfs_cache_expand_readahead(struct netfs_io_request *rreq, > - loff_t *_start, size_t *_len, loff_t i_size) > -{ > - struct netfs_cache_resources *cres = &rreq->cache_resources; > - > - if (cres->ops && cres->ops->expand_readahead) > - cres->ops->expand_readahead(cres, _start, _len, i_size); > -} > - > -static void netfs_rreq_expand(struct netfs_io_request *rreq, > - struct readahead_control *ractl) > -{ > - /* Give the cache a chance to change the request parameters. The > - * resultant request must contain the original region. > - */ > - netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size); > - > - /* Give the netfs a chance to change the request parameters. The > - * resultant request must contain the original region. > - */ > - if (rreq->netfs_ops->expand_readahead) > - rreq->netfs_ops->expand_readahead(rreq); > - > - /* Expand the request if the cache wants it to start earlier. Note > - * that the expansion may get further extended if the VM wishes to > - * insert THPs and the preferred start and/or end wind up in the middle > - * of THPs. > - * > - * If this is the case, however, the THP size should be an integer > - * multiple of the cache granule size, so we get a whole number of > - * granules to deal with. > - */ > - if (rreq->start != readahead_pos(ractl) || > - rreq->len != readahead_length(ractl)) { > - readahead_expand(ractl, rreq->start, rreq->len); > - rreq->start = readahead_pos(ractl); > - rreq->len = readahead_length(ractl); > - > - trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), > - netfs_read_trace_expanded); > - } > -} > - > -/** > - * netfs_readahead - Helper to manage a read request > - * @ractl: The description of the readahead request > - * > - * Fulfil a readahead request by drawing data from the cache if possible, or > - * the netfs if not. Space beyond the EOF is zero-filled. Multiple I/O > - * requests from different sources will get munged together. If necessary, the > - * readahead window can be expanded in either direction to a more convenient > - * alighment for RPC efficiency or to make storage in the cache feasible. > - * > - * The calling netfs must initialise a netfs context contiguous to the vfs > - * inode before calling this. > - * > - * This is usable whether or not caching is enabled. > - */ > -void netfs_readahead(struct readahead_control *ractl) > -{ > - struct netfs_io_request *rreq; > - struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host); > - int ret; > - > - _enter("%lx,%x", readahead_index(ractl), readahead_count(ractl)); > - > - if (readahead_count(ractl) == 0) > - return; > - > - rreq = netfs_alloc_request(ractl->mapping, ractl->file, > - readahead_pos(ractl), > - readahead_length(ractl), > - NETFS_READAHEAD); > - if (IS_ERR(rreq)) > - return; > - > - if (ctx->ops->begin_cache_operation) { > - ret = ctx->ops->begin_cache_operation(rreq); > - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) > - goto cleanup_free; > - } > - > - netfs_stat(&netfs_n_rh_readahead); > - trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl), > - netfs_read_trace_readahead); > - > - netfs_rreq_expand(rreq, ractl); > - > - /* Drop the refs on the folios here rather than in the cache or > - * filesystem. The locks will be dropped in netfs_rreq_unlock(). > - */ > - while (readahead_folio(ractl)) > - ; > - > - netfs_begin_read(rreq, false); > - return; > - > -cleanup_free: > - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); > - return; > -} > -EXPORT_SYMBOL(netfs_readahead); > - > -/** > - * netfs_readpage - Helper to manage a readpage request > - * @file: The file to read from > - * @subpage: A subpage of the folio to read > - * > - * Fulfil a readpage request by drawing data from the cache if possible, or the > - * netfs if not. Space beyond the EOF is zero-filled. Multiple I/O requests > - * from different sources will get munged together. > - * > - * The calling netfs must initialise a netfs context contiguous to the vfs > - * inode before calling this. > - * > - * This is usable whether or not caching is enabled. > - */ > -int netfs_readpage(struct file *file, struct page *subpage) > -{ > - struct folio *folio = page_folio(subpage); > - struct address_space *mapping = folio->mapping; > - struct netfs_io_request *rreq; > - struct netfs_i_context *ctx = netfs_i_context(mapping->host); > - int ret; > - > - _enter("%lx", folio_index(folio)); > - > - rreq = netfs_alloc_request(mapping, file, > - folio_file_pos(folio), folio_size(folio), > - NETFS_READPAGE); > - if (IS_ERR(rreq)) { > - ret = PTR_ERR(rreq); > - goto alloc_error; > - } > - > - if (ctx->ops->begin_cache_operation) { > - ret = ctx->ops->begin_cache_operation(rreq); > - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) > - goto discard; > - } > - > - netfs_stat(&netfs_n_rh_readpage); > - trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage); > - return netfs_begin_read(rreq, true); > - > -discard: > - netfs_put_request(rreq, false, netfs_rreq_trace_put_discard); > -alloc_error: > - folio_unlock(folio); > - return ret; > -} > -EXPORT_SYMBOL(netfs_readpage); > - > -/* > - * Prepare a folio for writing without reading first > - * @folio: The folio being prepared > - * @pos: starting position for the write > - * @len: length of write > - * @always_fill: T if the folio should always be completely filled/cleared > - * > - * In some cases, write_begin doesn't need to read at all: > - * - full folio write > - * - write that lies in a folio that is completely beyond EOF > - * - write that covers the folio from start to EOF or beyond it > - * > - * If any of these criteria are met, then zero out the unwritten parts > - * of the folio and return true. Otherwise, return false. > - */ > -static bool netfs_skip_folio_read(struct folio *folio, loff_t pos, size_t len, > - bool always_fill) > -{ > - struct inode *inode = folio_inode(folio); > - loff_t i_size = i_size_read(inode); > - size_t offset = offset_in_folio(folio, pos); > - size_t plen = folio_size(folio); > - > - if (unlikely(always_fill)) { > - if (pos - offset + len <= i_size) > - return false; /* Page entirely before EOF */ > - zero_user_segment(&folio->page, 0, plen); > - folio_mark_uptodate(folio); > - return true; > - } > - > - /* Full folio write */ > - if (offset == 0 && len >= plen) > - return true; > - > - /* Page entirely beyond the end of the file */ > - if (pos - offset >= i_size) > - goto zero_out; > - > - /* Write that covers from the start of the folio to EOF or beyond */ > - if (offset == 0 && (pos + len) >= i_size) > - goto zero_out; > - > - return false; > -zero_out: > - zero_user_segments(&folio->page, 0, offset, offset + len, len); > - return true; > -} > - > -/** > - * netfs_write_begin - Helper to prepare for writing > - * @file: The file to read from > - * @mapping: The mapping to read from > - * @pos: File position at which the write will begin > - * @len: The length of the write (may extend beyond the end of the folio chosen) > - * @aop_flags: AOP_* flags > - * @_folio: Where to put the resultant folio > - * @_fsdata: Place for the netfs to store a cookie > - * > - * Pre-read data for a write-begin request by drawing data from the cache if > - * possible, or the netfs if not. Space beyond the EOF is zero-filled. > - * Multiple I/O requests from different sources will get munged together. If > - * necessary, the readahead window can be expanded in either direction to a > - * more convenient alighment for RPC efficiency or to make storage in the cache > - * feasible. > - * > - * The calling netfs must provide a table of operations, only one of which, > - * issue_op, is mandatory. > - * > - * The check_write_begin() operation can be provided to check for and flush > - * conflicting writes once the folio is grabbed and locked. It is passed a > - * pointer to the fsdata cookie that gets returned to the VM to be passed to > - * write_end. It is permitted to sleep. It should return 0 if the request > - * should go ahead; unlock the folio and return -EAGAIN to cause the folio to > - * be regot; or return an error. > - * > - * The calling netfs must initialise a netfs context contiguous to the vfs > - * inode before calling this. > - * > - * This is usable whether or not caching is enabled. > - */ > -int netfs_write_begin(struct file *file, struct address_space *mapping, > - loff_t pos, unsigned int len, unsigned int aop_flags, > - struct folio **_folio, void **_fsdata) > -{ > - struct netfs_io_request *rreq; > - struct netfs_i_context *ctx = netfs_i_context(file_inode(file )); > - struct folio *folio; > - unsigned int fgp_flags; > - pgoff_t index = pos >> PAGE_SHIFT; > - int ret; > - > - DEFINE_READAHEAD(ractl, file, NULL, mapping, index); > - > -retry: > - fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE; > - if (aop_flags & AOP_FLAG_NOFS) > - fgp_flags |= FGP_NOFS; > - folio = __filemap_get_folio(mapping, index, fgp_flags, > - mapping_gfp_mask(mapping)); > - if (!folio) > - return -ENOMEM; > - > - if (ctx->ops->check_write_begin) { > - /* Allow the netfs (eg. ceph) to flush conflicts. */ > - ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata); > - if (ret < 0) { > - trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin); > - if (ret == -EAGAIN) > - goto retry; > - goto error; > - } > - } > - > - if (folio_test_uptodate(folio)) > - goto have_folio; > - > - /* If the page is beyond the EOF, we want to clear it - unless it's > - * within the cache granule containing the EOF, in which case we need > - * to preload the granule. > - */ > - if (!netfs_is_cache_enabled(ctx) && > - netfs_skip_folio_read(folio, pos, len, false)) { > - netfs_stat(&netfs_n_rh_write_zskip); > - goto have_folio_no_wait; > - } > - > - rreq = netfs_alloc_request(mapping, file, > - folio_file_pos(folio), folio_size(folio), > - NETFS_READ_FOR_WRITE); > - if (IS_ERR(rreq)) { > - ret = PTR_ERR(rreq); > - goto error; > - } > - rreq->no_unlock_folio = folio_index(folio); > - __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); > - > - if (ctx->ops->begin_cache_operation) { > - ret = ctx->ops->begin_cache_operation(rreq); > - if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) > - goto error_put; > - } > - > - netfs_stat(&netfs_n_rh_write_begin); > - trace_netfs_read(rreq, pos, len, netfs_read_trace_write_begin); > - > - /* Expand the request to meet caching requirements and download > - * preferences. > - */ > - ractl._nr_pages = folio_nr_pages(folio); > - netfs_rreq_expand(rreq, &ractl); > - > - /* We hold the folio locks, so we can drop the references */ > - folio_get(folio); > - while (readahead_folio(&ractl)) > - ; > - > - ret = netfs_begin_read(rreq, true); > - if (ret < 0) > - goto error; > - > -have_folio: > - ret = folio_wait_fscache_killable(folio); > - if (ret < 0) > - goto error; > -have_folio_no_wait: > - *_folio = folio; > - _leave(" = 0"); > - return 0; > - > -error_put: > - netfs_put_request(rreq, false, netfs_rreq_trace_put_failed); > -error: > - folio_unlock(folio); > - folio_put(folio); > - _leave(" = %d", ret); > - return ret; > -} > -EXPORT_SYMBOL(netfs_write_begin); > > Patch itself is fine though. Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx> -- Linux-cachefs mailing list Linux-cachefs@xxxxxxxxxx https://listman.redhat.com/mailman/listinfo/linux-cachefs