Make some adjustments to the handling of netfs groups so that ceph can use them for snap contexts: - Move netfs_get_group(), netfs_put_group() and netfs_put_group_many() to linux/netfs.h so that ceph can build its snap context on netfs groups. - Move netfs_set_group() and __netfs_set_group() to linux/netfs.h so that ceph_dirty_folio() can call them from inside of the locked section in which it finds the snap context to attach. - Provide a netfs_writepages_group() that takes a group as a parameter and attaches it to the request and make netfs_free_request() drop the ref on it. netfs_writepages() then becomes a wrapper that passes in a NULL group. - In netfs_perform_write(), only consider a folio to have a conflicting group if the folio's group pointer isn't NULL and if the folio is dirty. - In netfs_perform_write(), interject a small 10ms sleep after every 16 attempts to flush a folio within a single call. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> cc: Jeff Layton <jlayton@xxxxxxxxxx> cc: Viacheslav Dubeyko <slava@xxxxxxxxxxx> cc: Alex Markuze <amarkuze@xxxxxxxxxx> cc: Ilya Dryomov <idryomov@xxxxxxxxx> cc: ceph-devel@xxxxxxxxxxxxxxx cc: linux-fsdevel@xxxxxxxxxxxxxxx --- fs/netfs/buffered_write.c | 25 ++++------------- fs/netfs/internal.h | 32 --------------------- fs/netfs/objects.c | 1 + fs/netfs/write_issue.c | 38 +++++++++++++++++++++---- include/linux/netfs.h | 59 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 98 insertions(+), 57 deletions(-) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 0245449b93e3..12ddbe9bc78b 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -11,26 +11,9 @@ #include <linux/pagemap.h> #include <linux/slab.h> #include <linux/pagevec.h> +#include <linux/delay.h> #include "internal.h" -static void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) -{ - if (netfs_group) - folio_attach_private(folio, netfs_get_group(netfs_group)); -} - -static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) -{ - void *priv = folio_get_private(folio); - - if (unlikely(priv != netfs_group)) { - if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) - folio_attach_private(folio, netfs_get_group(netfs_group)); - else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) - folio_detach_private(folio); - } -} - /* * Grab a folio for writing and lock it. Attempt to allocate as large a folio * as possible to hold as much of the remaining length as possible in one go. @@ -113,6 +96,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, }; struct netfs_io_request *wreq = NULL; struct folio *folio = NULL, *writethrough = NULL; + unsigned int flush_counter = 0; unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos; @@ -208,7 +192,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, group = netfs_folio_group(folio); if (unlikely(group != netfs_group) && - group != NETFS_FOLIO_COPY_TO_CACHE) + group != NETFS_FOLIO_COPY_TO_CACHE && + (group || folio_test_dirty(folio))) goto flush_content; if (folio_test_uptodate(folio)) { @@ -341,6 +326,8 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, trace_netfs_folio(folio, netfs_flush_content); folio_unlock(folio); folio_put(folio); + if ((++flush_counter & 0xf) == 0xf) + msleep(10); ret = filemap_write_and_wait_range(mapping, fpos, fpos + flen - 1); if (ret < 0) goto error_folio_unlock; diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index eebb4f0f660e..2a6123c4da35 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -261,38 +261,6 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx) #endif } -/* - * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap). - */ -static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) -{ - if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE) - refcount_inc(&netfs_group->ref); - return netfs_group; -} - -/* - * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). - */ -static inline void netfs_put_group(struct netfs_group *netfs_group) -{ - if (netfs_group && - netfs_group != NETFS_FOLIO_COPY_TO_CACHE && - refcount_dec_and_test(&netfs_group->ref)) - netfs_group->free(netfs_group); -} - -/* - * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). - */ -static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) -{ - if (netfs_group && - netfs_group != NETFS_FOLIO_COPY_TO_CACHE && - refcount_sub_and_test(nr, &netfs_group->ref)) - netfs_group->free(netfs_group); -} - /* * Check to see if a buffer aligns with the crypto block size. If it doesn't * the crypto layer is going to copy all the data - in which case relying on diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c index 52d6fce70837..7fdbaa5c5cab 100644 --- a/fs/netfs/objects.c +++ b/fs/netfs/objects.c @@ -153,6 +153,7 @@ static void netfs_free_request(struct work_struct *work) kvfree(rreq->direct_bv); } + netfs_put_group(rreq->group); rolling_buffer_clear(&rreq->buffer); rolling_buffer_clear(&rreq->bounce); if (test_bit(NETFS_RREQ_PUT_RMW_TAIL, &rreq->flags)) diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c index 93601033ba08..3921fcf4f859 100644 --- a/fs/netfs/write_issue.c +++ b/fs/netfs/write_issue.c @@ -418,7 +418,7 @@ static int netfs_write_folio(struct netfs_io_request *wreq, netfs_issue_write(wreq, upload); } else if (fgroup != wreq->group) { /* We can't write this page to the server yet. */ - kdebug("wrong group"); + kdebug("wrong group %px != %px", fgroup, wreq->group); folio_redirty_for_writepage(wbc, folio); folio_unlock(folio); netfs_issue_write(wreq, upload); @@ -593,11 +593,19 @@ static void netfs_end_issue_write(struct netfs_io_request *wreq) netfs_wake_write_collector(wreq, false); } -/* - * Write some of the pending data back to the server +/** + * netfs_writepages_group - Flush data from the pagecache for a file + * @mapping: The file to flush from + * @wbc: Details of what should be flushed + * @group: The write grouping to flush (or NULL) + * + * Start asynchronous write back operations to flush dirty data belonging to a + * particular group in a file's pagecache back to the server and to the local + * cache. */ -int netfs_writepages(struct address_space *mapping, - struct writeback_control *wbc) +int netfs_writepages_group(struct address_space *mapping, + struct writeback_control *wbc, + struct netfs_group *group) { struct netfs_inode *ictx = netfs_inode(mapping->host); struct netfs_io_request *wreq = NULL; @@ -618,12 +626,15 @@ int netfs_writepages(struct address_space *mapping, if (!folio) goto out; - wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK); + wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), + NETFS_WRITEBACK); if (IS_ERR(wreq)) { error = PTR_ERR(wreq); goto couldnt_start; } + wreq->group = netfs_get_group(group); + trace_netfs_write(wreq, netfs_write_trace_writeback); netfs_stat(&netfs_n_wh_writepages); @@ -659,6 +670,21 @@ int netfs_writepages(struct address_space *mapping, _leave(" = %d", error); return error; } +EXPORT_SYMBOL(netfs_writepages_group); + +/** + * netfs_writepages - Flush data from the pagecache for a file + * @mapping: The file to flush from + * @wbc: Details of what should be flushed + * + * Start asynchronous write back operations to flush dirty data in a file's + * pagecache back to the server and to the local cache. + */ +int netfs_writepages(struct address_space *mapping, + struct writeback_control *wbc) +{ + return netfs_writepages_group(mapping, wbc, NULL); +} EXPORT_SYMBOL(netfs_writepages); /* diff --git a/include/linux/netfs.h b/include/linux/netfs.h index a67297de8a20..69052ac47ab1 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -457,6 +457,9 @@ int netfs_read_folio(struct file *, struct folio *); int netfs_write_begin(struct netfs_inode *, struct file *, struct address_space *, loff_t pos, unsigned int len, struct folio **, void **fsdata); +int netfs_writepages_group(struct address_space *mapping, + struct writeback_control *wbc, + struct netfs_group *group); int netfs_writepages(struct address_space *mapping, struct writeback_control *wbc); bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); @@ -597,4 +600,60 @@ static inline void netfs_wait_for_outstanding_io(struct inode *inode) wait_var_event(&ictx->io_count, atomic_read(&ictx->io_count) == 0); } +/* + * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) +{ + if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE) + refcount_inc(&netfs_group->ref); + return netfs_group; +} + +/* + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline void netfs_put_group(struct netfs_group *netfs_group) +{ + if (netfs_group && + netfs_group != NETFS_FOLIO_COPY_TO_CACHE && + refcount_dec_and_test(&netfs_group->ref)) + netfs_group->free(netfs_group); +} + +/* + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) +{ + if (netfs_group && + netfs_group != NETFS_FOLIO_COPY_TO_CACHE && + refcount_sub_and_test(nr, &netfs_group->ref)) + netfs_group->free(netfs_group); +} + +/* + * Set the group pointer directly on a folio. + */ +static inline void __netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) +{ + if (netfs_group) + folio_attach_private(folio, netfs_get_group(netfs_group)); +} + +/* + * Set the group pointer on a folio or the folio info record. + */ +static inline void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group) +{ + void *priv = folio_get_private(folio); + + if (unlikely(priv != netfs_group)) { + if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE)) + folio_attach_private(folio, netfs_get_group(netfs_group)); + else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE) + folio_detach_private(folio); + } +} + #endif /* _LINUX_NETFS_H */