On Mon, 24 Jun 2019 at 08:55, Christoph Hellwig <hch@xxxxxx> wrote: > At least for xfs we don't need the mark_inode_dirty at all. Can you > solve your gfs2 requirements on top of something like the patch below? > Note that in general it seems like you should try to only update the > on-disk inode size in writeback completion anyway, otherwise you can > have a stale i_size update before the data was actually written. > > > diff --git a/fs/iomap.c b/fs/iomap.c > index c98107a6bf81..fcf2cbd39114 100644 > --- a/fs/iomap.c > +++ b/fs/iomap.c > @@ -785,6 +785,7 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, > unsigned copied, struct page *page, struct iomap *iomap) > { > const struct iomap_page_ops *page_ops = iomap->page_ops; > + loff_t old_size = inode->i_size; > int ret; > > if (iomap->type == IOMAP_INLINE) { > @@ -796,7 +797,12 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, > ret = __iomap_write_end(inode, pos, len, copied, page, iomap); > } > > - __generic_write_end(inode, pos, ret, page); > + if (pos + ret > inode->i_size) > + i_size_write(inode, pos + ret); > + unlock_page(page); > + > + if (old_size < pos) > + pagecache_isize_extended(inode, old_size, pos); > if (page_ops && page_ops->page_done) > page_ops->page_done(inode, pos, copied, page, iomap); > put_page(page); That would work, but I don't like how this leaves us with a vfs function that updates i_size without bothering to dirty the inode very much. How about if we move the __generic_write_end call into the page_done callback and leave special handling to the filesystem code if needed instead? The below patch seems to work for gfs2. Thanks, Andreas --- fs/gfs2/bmap.c | 42 ++++++++++++++++++++++++++++++++++++------ fs/gfs2/incore.h | 1 + fs/iomap.c | 5 +++-- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 93ea1d529aa3..7569770e6871 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -991,10 +991,13 @@ static void gfs2_write_unlock(struct inode *inode) static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, unsigned len, struct iomap *iomap) { - unsigned int blockmask = i_blocksize(inode) - 1; + struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); - unsigned int blocks; + unsigned int blockmask, blocks; + if (!(gfs2_is_stuffed(ip) || gfs2_is_jdata(ip))) + return 0; + blockmask = i_blocksize(inode) - 1; blocks = ((pos & blockmask) + len + blockmask) >> inode->i_blkbits; return gfs2_trans_begin(sdp, RES_DINODE + blocks, 0); } @@ -1005,10 +1008,33 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, { struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_sbd *sdp = GFS2_SB(inode); + loff_t old_size; + + if (!page) + goto out; - if (page && !gfs2_is_stuffed(ip)) + /* + * Avoid calling __generic_write_end here to prevent mark_inode_dirty + * from being called for each page: it's relatively expensive on gfs2, + * so we defer that to gfs2_iomap_end. + */ + old_size = inode->i_size; + if (pos + copied > old_size) { + i_size_write(inode, pos + copied); + set_bit(GIF_SIZE_CHANGED, &ip->i_flags); + } + + unlock_page(page); + + if (old_size < pos) + pagecache_isize_extended(inode, old_size, pos); + + if (gfs2_is_jdata(ip) && !gfs2_is_stuffed(ip)) gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); - gfs2_trans_end(sdp); + +out: + if (current->journal_info) + gfs2_trans_end(sdp); } static const struct iomap_page_ops gfs2_iomap_page_ops = { @@ -1106,8 +1132,7 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, gfs2_trans_end(sdp); } - if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) - iomap->page_ops = &gfs2_iomap_page_ops; + iomap->page_ops = &gfs2_iomap_page_ops; return 0; out_trans_end: @@ -1160,6 +1185,11 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) goto out; + if (test_bit(GIF_SIZE_CHANGED, &ip->i_flags)) { + clear_bit(GIF_SIZE_CHANGED, &ip->i_flags); + mark_inode_dirty(inode); + } + if (!gfs2_is_stuffed(ip)) gfs2_ordered_add_inode(ip); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c9af93ac6c73..9f620807b396 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -396,6 +396,7 @@ enum { GIF_ORDERED = 4, GIF_FREE_VFS_INODE = 5, GIF_GLOP_PENDING = 6, + GIF_SIZE_CHANGED = 7, }; struct gfs2_inode { diff --git a/fs/iomap.c b/fs/iomap.c index 12654c2e78f8..b5c761827966 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -788,9 +788,10 @@ iomap_write_end(struct inode *inode, loff_t pos, unsigned len, ret = __iomap_write_end(inode, pos, len, copied, page, iomap); } - __generic_write_end(inode, pos, ret, page); if (page_ops && page_ops->page_done) - page_ops->page_done(inode, pos, copied, page, iomap); + page_ops->page_done(inode, pos, ret, page, iomap); + else + __generic_write_end(inode, pos, ret, page); put_page(page); if (ret < len) -- 2.20.1