On Fri, 22 Mar 2019 at 00:01, Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote: > On Thu, 21 Mar 2019 at 22:43, Dave Chinner <david@xxxxxxxxxxxxx> wrote: > > The problem is calling balance_dirty_pages() inside the > > ->iomap_begin/->iomap_end calls and not that it is called by the > > iomap infrastructure itself, right? > > > > Is so, I'd prefer to see this in iomap_apply() after the call to > > ops->iomap_end because iomap_file_buffered_write() can iterate and > > call iomap_apply() multiple times. This would keep the balancing to > > a per-iomap granularity, rather than a per-syscall granularity. > > > > i.e. if we do write(2GB), we want more than one balancing call > > during that syscall, so it would be up to the filesystem to a) limit > > the size of write mappings to something smaller (e.g. 1024 pages) > > so that there are still frequent balancing calls for large writes. > > Hmm. The looping across multiple mappings isn't done in iomap_apply > but in iomap_file_buffered_write, so the balancing could go into > iomap_apply or iomap_file_buffered_write, but can't go further up the > stack. Given that, iomap_file_buffered_write seems the better place, > but this is still quite horrible. Here's a more reasonable version of my first patch, with a cleaned up and hopefully fixed gfs2 part. In addition, this checks for IOMAP_F_UNBALANCED in iomap_dirty_actor, the actor for iomap_file_dirty. We don't use iomap_file_dirty in gfs2, but we should probably allowing to skip the dirty page balancing there as well. Thanks, Andreas --- fs/gfs2/bmap.c | 64 +++++++++++++++++++++++++++++++++---------- fs/iomap.c | 6 ++-- include/linux/iomap.h | 1 + 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 02b2646d84b3a..628d66d07fc6c 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -974,6 +974,19 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos, gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); } +static void gfs2_write_end(struct inode *inode, struct buffer_head *dibh) +{ + struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_trans *tr = current->journal_info; + + gfs2_ordered_add_inode(ip); + + if (tr->tr_num_buf_new) + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); + else + gfs2_trans_add_meta(ip->i_gl, dibh); +} + static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, loff_t length, unsigned flags, struct iomap *iomap, @@ -996,6 +1009,25 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, if (ret) goto out_unlock; + if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip) || inode == sdp->sd_rindex) { + int max_pages; + u64 max_length; + + iomap->flags |= IOMAP_F_UNBALANCED; + + /* + * Limit the write size: this ensures that write throttling + * will kick in fast enough even when we don't call + * balance_dirty_pages_ratelimited for each page written. + */ + max_pages = current->nr_dirtied_pause - current->nr_dirtied; + if (max_pages < 8) + max_pages = 8; + max_length = (u64)max_pages << PAGE_SHIFT; + if (iomap->length > max_length) + iomap->length = max_length; + } + alloc_required = unstuff || iomap->type == IOMAP_HOLE; if (alloc_required || gfs2_is_jdata(ip)) @@ -1052,6 +1084,11 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, } if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) iomap->page_done = gfs2_iomap_journaled_page_done; + + if (!(iomap->flags & IOMAP_F_UNBALANCED)) { + gfs2_write_end(inode, mp->mp_bh[0]); + gfs2_trans_end(sdp); + } return 0; out_trans_end: @@ -1103,30 +1140,29 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); - struct gfs2_trans *tr = current->journal_info; struct buffer_head *dibh = iomap->private; if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) goto out; - if (iomap->type != IOMAP_INLINE) { - gfs2_ordered_add_inode(ip); + if (current->journal_info) { + struct gfs2_sbd *sdp = GFS2_SB(inode); - if (tr->tr_num_buf_new) - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); - else - gfs2_trans_add_meta(ip->i_gl, dibh); - } + if (iomap->type != IOMAP_INLINE) + gfs2_write_end(inode, dibh); - if (inode == sdp->sd_rindex) { - adjust_fs_space(inode); - sdp->sd_rindex_uptodate = 0; - } + if (inode == sdp->sd_rindex) { + adjust_fs_space(inode); + sdp->sd_rindex_uptodate = 0; + } - gfs2_trans_end(sdp); + gfs2_trans_end(sdp); + } gfs2_inplace_release(ip); + if (iomap->flags & IOMAP_F_UNBALANCED) + balance_dirty_pages_ratelimited(inode->i_mapping); + if (length != written && (iomap->flags & IOMAP_F_NEW)) { /* Deallocate blocks that were just allocated. */ loff_t blockmask = i_blocksize(inode) - 1; diff --git a/fs/iomap.c b/fs/iomap.c index 97cb9d486a7da..5f950fee0834f 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -863,7 +863,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data, written += copied; length -= copied; - balance_dirty_pages_ratelimited(inode->i_mapping); + if (!(iomap->flags & IOMAP_F_UNBALANCED)) + balance_dirty_pages_ratelimited(inode->i_mapping); } while (iov_iter_count(i) && length); return written ? written : status; @@ -945,7 +946,8 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data, written += status; length -= status; - balance_dirty_pages_ratelimited(inode->i_mapping); + if (!(iomap->flags & IOMAP_F_UNBALANCED)) + balance_dirty_pages_ratelimited(inode->i_mapping); } while (length); return written; diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 0fefb5455bdaf..e9a04e76a3217 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -35,6 +35,7 @@ struct vm_fault; #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ #define IOMAP_F_DIRTY 0x02 /* uncommitted metadata */ #define IOMAP_F_BUFFER_HEAD 0x04 /* file system requires buffer heads */ +#define IOMAP_F_UNBALANCED 0x08 /* don't balance dirty pages */ /* * Flags that only need to be reported for IOMAP_REPORT requests: -- 2.20.1