David Howells <dhowells@xxxxxxxxxx> wrote: > And then CIFS. ... > > Base + write_cache_pages(): > WRITE: bw=457MiB/s (479MB/s), 114MiB/s-114MiB/s (120MB/s-120MB/s) > WRITE: bw=449MiB/s (471MB/s), 112MiB/s-113MiB/s (118MB/s-118MB/s) > WRITE: bw=459MiB/s (482MB/s), 115MiB/s-115MiB/s (120MB/s-121MB/s) Here's my patch to convert cifs to use write_cache_pages(). David --- fs/cifs/file.c | 400 ++++++++++++++++----------------------------------------- 1 file changed, 115 insertions(+), 285 deletions(-) diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 3d304d4a54d6..04e2466609d9 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2613,140 +2613,35 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to) return rc; } -/* - * Extend the region to be written back to include subsequent contiguously - * dirty pages if possible, but don't sleep while doing so. - */ -static void cifs_extend_writeback(struct address_space *mapping, - long *_count, - loff_t start, - int max_pages, - size_t max_len, - unsigned int *_len) -{ - struct folio_batch batch; - struct folio *folio; - unsigned int psize, nr_pages; - size_t len = *_len; - pgoff_t index = (start + len) / PAGE_SIZE; - bool stop = true; - unsigned int i; - XA_STATE(xas, &mapping->i_pages, index); - - folio_batch_init(&batch); - - do { - /* Firstly, we gather up a batch of contiguous dirty pages - * under the RCU read lock - but we can't clear the dirty flags - * there if any of those pages are mapped. - */ - rcu_read_lock(); - - xas_for_each(&xas, folio, ULONG_MAX) { - stop = true; - if (xas_retry(&xas, folio)) - continue; - if (xa_is_value(folio)) - break; - if (folio_index(folio) != index) - break; - if (!folio_try_get_rcu(folio)) { - xas_reset(&xas); - continue; - } - nr_pages = folio_nr_pages(folio); - if (nr_pages > max_pages) - break; - - /* Has the page moved or been split? */ - if (unlikely(folio != xas_reload(&xas))) { - folio_put(folio); - break; - } - - if (!folio_trylock(folio)) { - folio_put(folio); - break; - } - if (!folio_test_dirty(folio) || folio_test_writeback(folio)) { - folio_unlock(folio); - folio_put(folio); - break; - } - - psize = folio_size(folio); - len += psize; - stop = false; - if (max_pages <= 0 || len >= max_len || *_count <= 0) - stop = true; - - index += nr_pages; - if (!folio_batch_add(&batch, folio)) - break; - if (stop) - break; - } - - if (!stop) - xas_pause(&xas); - rcu_read_unlock(); - - /* Now, if we obtained any pages, we can shift them to being - * writable and mark them for caching. - */ - if (!folio_batch_count(&batch)) - break; - - for (i = 0; i < folio_batch_count(&batch); i++) { - folio = batch.folios[i]; - /* The folio should be locked, dirty and not undergoing - * writeback from the loop above. - */ - if (!folio_clear_dirty_for_io(folio)) - WARN_ON(1); - if (folio_start_writeback(folio)) - WARN_ON(1); - - *_count -= folio_nr_pages(folio); - folio_unlock(folio); - } - - folio_batch_release(&batch); - cond_resched(); - } while (!stop); - - *_len = len; -} +struct cifs_writepages_context { + struct cifs_writedata *wdata; + struct TCP_Server_Info *server; + struct cifs_credits credits; + unsigned long long start; + size_t len; + size_t wsize; + unsigned int xid; + bool begun; + bool caching; +}; /* - * Write back the locked page and any subsequent non-locked dirty pages. + * Set up a writeback op. */ -static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, - struct writeback_control *wbc, - struct folio *folio, - loff_t start, loff_t end) +static int cifs_writeback_begin(struct address_space *mapping, + struct writeback_control *wbc, + unsigned long long start, + struct cifs_writepages_context *ctx) { struct inode *inode = mapping->host; struct TCP_Server_Info *server; struct cifs_writedata *wdata; struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb); - struct cifs_credits credits_on_stack; - struct cifs_credits *credits = &credits_on_stack; struct cifsFileInfo *cfile = NULL; - unsigned int xid, wsize, len; - loff_t i_size = i_size_read(inode); - size_t max_len; - long count = wbc->nr_to_write; + unsigned int wsize, len; int rc; - /* The folio should be locked, dirty and not undergoing writeback. */ - if (folio_start_writeback(folio)) - WARN_ON(1); - - count -= folio_nr_pages(folio); - len = folio_size(folio); - - xid = get_xid(); + ctx->xid = get_xid(); server = cifs_pick_channel(cifs_sb_master_tcon(cifs_sb)->ses); rc = cifs_get_writable_file(CIFS_I(inode), FIND_WR_ANY, &cfile); @@ -2756,7 +2651,7 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, } rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize, - &wsize, credits); + &wsize, &ctx->credits); if (rc != 0) goto err_close; @@ -2767,56 +2662,60 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, } wdata->sync_mode = wbc->sync_mode; - wdata->offset = folio_pos(folio); + wdata->offset = start; wdata->pid = cfile->pid; - wdata->credits = credits_on_stack; + wdata->credits = ctx->credits; wdata->cfile = cfile; wdata->server = server; - cfile = NULL; - - /* Find all consecutive lockable dirty pages, stopping when we find a - * page that is not immediately lockable, is not dirty or is missing, - * or we reach the end of the range. - */ - if (start < i_size) { - /* Trim the write to the EOF; the extra data is ignored. Also - * put an upper limit on the size of a single storedata op. - */ - max_len = wsize; - max_len = min_t(unsigned long long, max_len, end - start + 1); - max_len = min_t(unsigned long long, max_len, i_size - start); - - if (len < max_len) { - int max_pages = INT_MAX; - -#ifdef CONFIG_CIFS_SMB_DIRECT - if (server->smbd_conn) - max_pages = server->smbd_conn->max_frmr_depth; -#endif - max_pages -= folio_nr_pages(folio); + ctx->wsize = wsize; + ctx->server = server; + ctx->wdata = wdata; + ctx->begun = true; + return 0; - if (max_pages > 0) - cifs_extend_writeback(mapping, &count, start, - max_pages, max_len, &len); - } - len = min_t(loff_t, len, max_len); +err_uncredit: + add_credits_and_wake_if(server, &ctx->credits, 0); +err_close: + if (cfile) + cifsFileInfo_put(cfile); +err_xid: + free_xid(ctx->xid); + if (is_retryable_error(rc)) { + cifs_pages_write_redirty(inode, start, len); + } else { + cifs_pages_write_failed(inode, start, len); + mapping_set_error(mapping, rc); } + /* Indication to update ctime and mtime as close is deferred */ + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); + return rc; +} - wdata->bytes = len; +/* + * Flush a block of pages to the server and the cache. + */ +static int cifs_writepages_submit(struct address_space *mapping, + struct writeback_control *wbc, + struct cifs_writepages_context *ctx) +{ + struct TCP_Server_Info *server = ctx->server; + struct cifs_writedata *wdata = ctx->wdata; + unsigned long long i_size = i_size_read(mapping->host); + int rc; - /* We now have a contiguous set of dirty pages, each with writeback - * set; the first page is still locked at this point, but all the rest - * have been unlocked. + /* We now have a contiguous set of dirty pages, each with + * writeback set. */ - folio_unlock(folio); - - if (start < i_size) { - iov_iter_xarray(&wdata->iter, ITER_SOURCE, &mapping->i_pages, - start, len); + if (ctx->start < i_size) { + if (ctx->len > i_size - ctx->start) + ctx->len = i_size - ctx->start; + wdata->bytes = ctx->len; + iov_iter_xarray(&wdata->iter, ITER_SOURCE, + &mapping->i_pages, ctx->start, wdata->bytes); rc = adjust_credits(wdata->server, &wdata->credits, wdata->bytes); if (rc) - goto err_wdata; + goto err; if (wdata->cfile->invalidHandle) rc = -EAGAIN; @@ -2827,133 +2726,79 @@ static ssize_t cifs_write_back_from_locked_folio(struct address_space *mapping, kref_put(&wdata->refcount, cifs_writedata_release); goto err_close; } + } else { /* The dirty region was entirely beyond the EOF. */ - cifs_pages_written_back(inode, start, len); + cifs_pages_written_back(mapping->host, ctx->start, ctx->len); rc = 0; } -err_wdata: +err: kref_put(&wdata->refcount, cifs_writedata_release); -err_uncredit: - add_credits_and_wake_if(server, credits, 0); + add_credits_and_wake_if(server, &ctx->credits, 0); err_close: - if (cfile) - cifsFileInfo_put(cfile); -err_xid: - free_xid(xid); + free_xid(ctx->xid); if (rc == 0) { - wbc->nr_to_write = count; - rc = len; + rc = 0; } else if (is_retryable_error(rc)) { - cifs_pages_write_redirty(inode, start, len); + cifs_pages_write_redirty(mapping->host, ctx->start, ctx->len); } else { - cifs_pages_write_failed(inode, start, len); + cifs_pages_write_failed(mapping->host, ctx->start, ctx->len); mapping_set_error(mapping, rc); } + /* Indication to update ctime and mtime as close is deferred */ - set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags); + set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(mapping->host)->flags); + ctx->wdata = NULL; + ctx->begun = false; return rc; } /* - * write a region of pages back to the server + * Add a page to the set and flush when large enough. */ -static int cifs_writepages_region(struct address_space *mapping, - struct writeback_control *wbc, - loff_t start, loff_t end, loff_t *_next) +static int cifs_writepages_add_folio(struct folio *folio, + struct writeback_control *wbc, void *data) { - struct folio_batch fbatch; - int skips = 0; - - folio_batch_init(&fbatch); - do { - int nr; - pgoff_t index = start / PAGE_SIZE; - - nr = filemap_get_folios_tag(mapping, &index, end / PAGE_SIZE, - PAGECACHE_TAG_DIRTY, &fbatch); - if (!nr) - break; - - for (int i = 0; i < nr; i++) { - ssize_t ret; - struct folio *folio = fbatch.folios[i]; - -redo_folio: - start = folio_pos(folio); /* May regress with THPs */ - - /* At this point we hold neither the i_pages lock nor the - * page lock: the page may be truncated or invalidated - * (changing page->mapping to NULL), or even swizzled - * back from swapper_space to tmpfs file mapping - */ - if (wbc->sync_mode != WB_SYNC_NONE) { - ret = folio_lock_killable(folio); - if (ret < 0) - goto write_error; - } else { - if (!folio_trylock(folio)) - goto skip_write; - } - - if (folio_mapping(folio) != mapping || - !folio_test_dirty(folio)) { - start += folio_size(folio); - folio_unlock(folio); - continue; - } - - if (folio_test_writeback(folio) || - folio_test_fscache(folio)) { - folio_unlock(folio); - if (wbc->sync_mode == WB_SYNC_NONE) - goto skip_write; - - folio_wait_writeback(folio); -#ifdef CONFIG_CIFS_FSCACHE - folio_wait_fscache(folio); -#endif - goto redo_folio; - } - - if (!folio_clear_dirty_for_io(folio)) - /* We hold the page lock - it should've been dirty. */ - WARN_ON(1); + struct cifs_writepages_context *ctx = data; + unsigned long long i_size = i_size_read(folio->mapping->host); + unsigned long long pos = folio_pos(folio); + size_t size = folio_size(folio); + int ret; - ret = cifs_write_back_from_locked_folio(mapping, wbc, folio, start, end); - if (ret < 0) - goto write_error; + if (pos < i_size && size > i_size - pos) + size = i_size - pos; - start += ret; - continue; - -write_error: - folio_batch_release(&fbatch); - *_next = start; + if (ctx->begun) { + if (pos == ctx->start + ctx->len && + ctx->len + size <= ctx->wsize) + goto add; + ret = cifs_writepages_submit(folio->mapping, wbc, ctx); + if (ret < 0) { + ctx->begun = false; return ret; + } + } -skip_write: - /* - * Too many skipped writes, or need to reschedule? - * Treat it as a write error without an error code. - */ - if (skips >= 5 || need_resched()) { - ret = 0; - goto write_error; - } + ret = cifs_writeback_begin(folio->mapping, wbc, pos, ctx); + if (ret < 0) + return ret; - /* Otherwise, just skip that folio and go on to the next */ - skips++; - start += folio_size(folio); - continue; - } + ctx->start = folio_pos(folio); + ctx->len = 0; +add: + ctx->len += folio_size(folio); - folio_batch_release(&fbatch); - cond_resched(); - } while (wbc->nr_to_write > 0); + folio_wait_fscache(folio); + folio_start_writeback(folio); + folio_unlock(folio); - *_next = start; + if (ctx->len >= ctx->wsize) { + ret = cifs_writepages_submit(folio->mapping, wbc, ctx); + if (ret < 0) + return ret; + ctx->begun = false; + } return 0; } @@ -2963,7 +2808,7 @@ static int cifs_writepages_region(struct address_space *mapping, static int cifs_writepages(struct address_space *mapping, struct writeback_control *wbc) { - loff_t start, next; + struct cifs_writepages_context ctx = {}; int ret; /* We have to be careful as we can end up racing with setattr() @@ -2971,26 +2816,11 @@ static int cifs_writepages(struct address_space *mapping, * to prevent it. */ - if (wbc->range_cyclic) { - start = mapping->writeback_index * PAGE_SIZE; - ret = cifs_writepages_region(mapping, wbc, start, LLONG_MAX, &next); - if (ret == 0) { - mapping->writeback_index = next / PAGE_SIZE; - if (start > 0 && wbc->nr_to_write > 0) { - ret = cifs_writepages_region(mapping, wbc, 0, - start, &next); - if (ret == 0) - mapping->writeback_index = - next / PAGE_SIZE; - } - } - } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) { - ret = cifs_writepages_region(mapping, wbc, 0, LLONG_MAX, &next); - if (wbc->nr_to_write > 0 && ret == 0) - mapping->writeback_index = next / PAGE_SIZE; - } else { - ret = cifs_writepages_region(mapping, wbc, - wbc->range_start, wbc->range_end, &next); + ret = write_cache_pages(mapping, wbc, cifs_writepages_add_folio, &ctx); + if (ret >= 0 && ctx.begun) { + ret = cifs_writepages_submit(mapping, wbc, &ctx); + if (ret < 0) + return ret; } return ret;