On Tue, Apr 30, 2019 at 12:09:34AM +0200, Andreas Gruenbacher wrote: > Since commit 64bc06bb32ee ("gfs2: iomap buffered write support"), gfs2 is doing > buffered writes by starting a transaction in iomap_begin, writing a range of > pages, and ending that transaction in iomap_end. This approach suffers from > two problems: > > (1) Any allocations necessary for the write are done in iomap_begin, so when > the data aren't journaled, there is no need for keeping the transaction open > until iomap_end. > > (2) Transactions keep the gfs2 log flush lock held. When > iomap_file_buffered_write calls balance_dirty_pages, this can end up calling > gfs2_write_inode, which will try to flush the log. This requires taking the > log flush lock which is already held, resulting in a deadlock. /me wonders how holding the log flush lock doesn't seriously limit performance, but gfs2 isn't my fight so I'll set that aside and assume that a patch S-o-B'd by both maintainers is ok. :) How should we merge this patch #5? It doesn't touch fs/iomap.c itself, so do you want me to pull it into the iomap branch along with the previous four patches? That would be fine with me (and easier than a multi-tree merge mess)... --D > > Fix both of these issues by not keeping transactions open from iomap_begin to > iomap_end. Instead, start a small transaction in page_prepare and end it in > page_done when necessary. > > Reported-by: Edwin Török <edvin.torok@xxxxxxxxxx> > Fixes: 64bc06bb32ee ("gfs2: iomap buffered write support") > Signed-off-by: Andreas Gruenbacher <agruenba@xxxxxxxxxx> > Signed-off-by: Bob Peterson <rpeterso@xxxxxxxxxx> > --- > fs/gfs2/aops.c | 14 +++++--- > fs/gfs2/bmap.c | 88 +++++++++++++++++++++++++++----------------------- > 2 files changed, 58 insertions(+), 44 deletions(-) > > diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c > index 05dd78f4b2b3..6210d4429d84 100644 > --- a/fs/gfs2/aops.c > +++ b/fs/gfs2/aops.c > @@ -649,7 +649,7 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping, > */ > void adjust_fs_space(struct inode *inode) > { > - struct gfs2_sbd *sdp = inode->i_sb->s_fs_info; > + struct gfs2_sbd *sdp = GFS2_SB(inode); > struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode); > struct gfs2_inode *l_ip = GFS2_I(sdp->sd_sc_inode); > struct gfs2_statfs_change_host *m_sc = &sdp->sd_statfs_master; > @@ -657,10 +657,13 @@ void adjust_fs_space(struct inode *inode) > struct buffer_head *m_bh, *l_bh; > u64 fs_total, new_free; > > + if (gfs2_trans_begin(sdp, 2 * RES_STATFS, 0) != 0) > + return; > + > /* Total up the file system space, according to the latest rindex. */ > fs_total = gfs2_ri_total(sdp); > if (gfs2_meta_inode_buffer(m_ip, &m_bh) != 0) > - return; > + goto out; > > spin_lock(&sdp->sd_statfs_spin); > gfs2_statfs_change_in(m_sc, m_bh->b_data + > @@ -675,11 +678,14 @@ void adjust_fs_space(struct inode *inode) > gfs2_statfs_change(sdp, new_free, new_free, 0); > > if (gfs2_meta_inode_buffer(l_ip, &l_bh) != 0) > - goto out; > + goto out2; > update_statfs(sdp, m_bh, l_bh); > brelse(l_bh); > -out: > +out2: > brelse(m_bh); > +out: > + sdp->sd_rindex_uptodate = 0; > + gfs2_trans_end(sdp); > } > > /** > diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c > index aa014725f84a..27c82f4aaf32 100644 > --- a/fs/gfs2/bmap.c > +++ b/fs/gfs2/bmap.c > @@ -991,17 +991,28 @@ static void gfs2_write_unlock(struct inode *inode) > gfs2_glock_dq_uninit(&ip->i_gh); > } > > +static int gfs2_iomap_page_prepare(struct inode *inode, loff_t pos, > + unsigned len, struct iomap *iomap) > +{ > + struct gfs2_sbd *sdp = GFS2_SB(inode); > + > + return gfs2_trans_begin(sdp, RES_DINODE + (len >> inode->i_blkbits), 0); > +} > + > static void gfs2_iomap_page_done(struct inode *inode, loff_t pos, > unsigned copied, struct page *page, > struct iomap *iomap) > { > struct gfs2_inode *ip = GFS2_I(inode); > + struct gfs2_sbd *sdp = GFS2_SB(inode); > > - if (page) > + if (page && !gfs2_is_stuffed(ip)) > gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); > + gfs2_trans_end(sdp); > } > > static const struct iomap_page_ops gfs2_iomap_page_ops = { > + .page_prepare = gfs2_iomap_page_prepare, > .page_done = gfs2_iomap_page_done, > }; > > @@ -1057,31 +1068,45 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos, > if (alloc_required) > rblocks += gfs2_rg_blocks(ip, data_blocks + ind_blocks); > > - ret = gfs2_trans_begin(sdp, rblocks, iomap->length >> inode->i_blkbits); > - if (ret) > - goto out_trans_fail; > + if (unstuff || iomap->type == IOMAP_HOLE) { > + struct gfs2_trans *tr; > > - if (unstuff) { > - ret = gfs2_unstuff_dinode(ip, NULL); > + ret = gfs2_trans_begin(sdp, rblocks, > + iomap->length >> inode->i_blkbits); > if (ret) > - goto out_trans_end; > - release_metapath(mp); > - ret = gfs2_iomap_get(inode, iomap->offset, iomap->length, > - flags, iomap, mp); > - if (ret) > - goto out_trans_end; > - } > + goto out_trans_fail; > > - if (iomap->type == IOMAP_HOLE) { > - ret = gfs2_iomap_alloc(inode, iomap, flags, mp); > - if (ret) { > - gfs2_trans_end(sdp); > - gfs2_inplace_release(ip); > - punch_hole(ip, iomap->offset, iomap->length); > - goto out_qunlock; > + if (unstuff) { > + ret = gfs2_unstuff_dinode(ip, NULL); > + if (ret) > + goto out_trans_end; > + release_metapath(mp); > + ret = gfs2_iomap_get(inode, iomap->offset, > + iomap->length, flags, iomap, mp); > + if (ret) > + goto out_trans_end; > + } > + > + if (iomap->type == IOMAP_HOLE) { > + ret = gfs2_iomap_alloc(inode, iomap, flags, mp); > + if (ret) { > + gfs2_trans_end(sdp); > + gfs2_inplace_release(ip); > + punch_hole(ip, iomap->offset, iomap->length); > + goto out_qunlock; > + } > } > + > + tr = current->journal_info; > + if (tr->tr_num_buf_new) > + __mark_inode_dirty(inode, I_DIRTY_DATASYNC); > + else > + gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[0]); > + > + gfs2_trans_end(sdp); > } > - if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip)) > + > + if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip)) > iomap->page_ops = &gfs2_iomap_page_ops; > return 0; > > @@ -1121,10 +1146,6 @@ static int gfs2_iomap_begin(struct inode *inode, loff_t pos, loff_t length, > iomap->type != IOMAP_MAPPED) > ret = -ENOTBLK; > } > - if (!ret) { > - get_bh(mp.mp_bh[0]); > - iomap->private = mp.mp_bh[0]; > - } > release_metapath(&mp); > trace_gfs2_iomap_end(ip, iomap, ret); > return ret; > @@ -1135,27 +1156,16 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, > { > struct gfs2_inode *ip = GFS2_I(inode); > struct gfs2_sbd *sdp = GFS2_SB(inode); > - struct gfs2_trans *tr = current->journal_info; > - struct buffer_head *dibh = iomap->private; > > if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE) > goto out; > > - if (iomap->type != IOMAP_INLINE) { > + if (!gfs2_is_stuffed(ip)) > gfs2_ordered_add_inode(ip); > > - if (tr->tr_num_buf_new) > - __mark_inode_dirty(inode, I_DIRTY_DATASYNC); > - else > - gfs2_trans_add_meta(ip->i_gl, dibh); > - } > - > - if (inode == sdp->sd_rindex) { > + if (inode == sdp->sd_rindex) > adjust_fs_space(inode); > - sdp->sd_rindex_uptodate = 0; > - } > > - gfs2_trans_end(sdp); > gfs2_inplace_release(ip); > > if (length != written && (iomap->flags & IOMAP_F_NEW)) { > @@ -1175,8 +1185,6 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length, > gfs2_write_unlock(inode); > > out: > - if (dibh) > - brelse(dibh); > return 0; > } > > -- > 2.20.1 >