Re: gfs2 iomap dealock, IOMAP_F_UNBALANCED

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, 22 Mar 2019 at 00:01, Andreas Gruenbacher <agruenba@xxxxxxxxxx> wrote:
> On Thu, 21 Mar 2019 at 22:43, Dave Chinner <david@xxxxxxxxxxxxx> wrote:
> > The problem is calling balance_dirty_pages() inside the
> > ->iomap_begin/->iomap_end calls and not that it is called by the
> > iomap infrastructure itself, right?
> >
> > Is so, I'd prefer to see this in iomap_apply() after the call to
> > ops->iomap_end because iomap_file_buffered_write() can iterate and
> > call iomap_apply() multiple times. This would keep the balancing to
> > a per-iomap granularity, rather than a per-syscall granularity.
> >
> > i.e. if we do write(2GB), we want more than one balancing call
> > during that syscall, so it would be up to the filesystem to a) limit
> > the size of write mappings to something smaller (e.g. 1024 pages)
> > so that there are still frequent balancing calls for large writes.
>
> Hmm. The looping across multiple mappings isn't done in iomap_apply
> but in iomap_file_buffered_write, so the balancing could go into
> iomap_apply or iomap_file_buffered_write, but can't go further up the
> stack. Given that, iomap_file_buffered_write seems the better place,
> but this is still quite horrible.

Here's a more reasonable version of my first patch, with a cleaned up
and hopefully fixed gfs2 part.

In addition, this checks for IOMAP_F_UNBALANCED in iomap_dirty_actor,
the actor for iomap_file_dirty.  We don't use iomap_file_dirty in gfs2,
but we should probably allowing to skip the dirty page balancing there
as well.

Thanks,
Andreas
---
 fs/gfs2/bmap.c        | 64 +++++++++++++++++++++++++++++++++----------
 fs/iomap.c            |  6 ++--
 include/linux/iomap.h |  1 +
 3 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 02b2646d84b3a..628d66d07fc6c 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -974,6 +974,19 @@ static void gfs2_iomap_journaled_page_done(struct inode *inode, loff_t pos,
 	gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
 }
 
+static void gfs2_write_end(struct inode *inode, struct buffer_head *dibh)
+{
+	struct gfs2_inode *ip = GFS2_I(inode);
+	struct gfs2_trans *tr = current->journal_info;
+
+	gfs2_ordered_add_inode(ip);
+
+	if (tr->tr_num_buf_new)
+		__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
+	else
+		gfs2_trans_add_meta(ip->i_gl, dibh);
+}
+
 static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 				  loff_t length, unsigned flags,
 				  struct iomap *iomap,
@@ -996,6 +1009,25 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 	if (ret)
 		goto out_unlock;
 
+	if (gfs2_is_stuffed(ip) || gfs2_is_jdata(ip) || inode == sdp->sd_rindex) {
+		int max_pages;
+		u64 max_length;
+
+		iomap->flags |= IOMAP_F_UNBALANCED;
+
+		/*
+		 * Limit the write size: this ensures that write throttling
+		 * will kick in fast enough even when we don't call
+		 * balance_dirty_pages_ratelimited for each page written.
+		 */
+		max_pages = current->nr_dirtied_pause - current->nr_dirtied;
+		if (max_pages < 8)
+			max_pages = 8;
+		max_length = (u64)max_pages << PAGE_SHIFT;
+		if (iomap->length > max_length)
+			iomap->length = max_length;
+	}
+
 	alloc_required = unstuff || iomap->type == IOMAP_HOLE;
 
 	if (alloc_required || gfs2_is_jdata(ip))
@@ -1052,6 +1084,11 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
 	}
 	if (!gfs2_is_stuffed(ip) && gfs2_is_jdata(ip))
 		iomap->page_done = gfs2_iomap_journaled_page_done;
+
+	if (!(iomap->flags & IOMAP_F_UNBALANCED)) {
+		gfs2_write_end(inode, mp->mp_bh[0]);
+		gfs2_trans_end(sdp);
+	}
 	return 0;
 
 out_trans_end:
@@ -1103,30 +1140,29 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 			  ssize_t written, unsigned flags, struct iomap *iomap)
 {
 	struct gfs2_inode *ip = GFS2_I(inode);
-	struct gfs2_sbd *sdp = GFS2_SB(inode);
-	struct gfs2_trans *tr = current->journal_info;
 	struct buffer_head *dibh = iomap->private;
 
 	if ((flags & (IOMAP_WRITE | IOMAP_DIRECT)) != IOMAP_WRITE)
 		goto out;
 
-	if (iomap->type != IOMAP_INLINE) {
-		gfs2_ordered_add_inode(ip);
+	if (current->journal_info) {
+		struct gfs2_sbd *sdp = GFS2_SB(inode);
 
-		if (tr->tr_num_buf_new)
-			__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
-		else
-			gfs2_trans_add_meta(ip->i_gl, dibh);
-	}
+		if (iomap->type != IOMAP_INLINE)
+			gfs2_write_end(inode, dibh);
 
-	if (inode == sdp->sd_rindex) {
-		adjust_fs_space(inode);
-		sdp->sd_rindex_uptodate = 0;
-	}
+		if (inode == sdp->sd_rindex) {
+			adjust_fs_space(inode);
+			sdp->sd_rindex_uptodate = 0;
+		}
 
-	gfs2_trans_end(sdp);
+		gfs2_trans_end(sdp);
+	}
 	gfs2_inplace_release(ip);
 
+	if (iomap->flags & IOMAP_F_UNBALANCED)
+		balance_dirty_pages_ratelimited(inode->i_mapping);
+
 	if (length != written && (iomap->flags & IOMAP_F_NEW)) {
 		/* Deallocate blocks that were just allocated. */
 		loff_t blockmask = i_blocksize(inode) - 1;
diff --git a/fs/iomap.c b/fs/iomap.c
index 97cb9d486a7da..5f950fee0834f 100644
--- a/fs/iomap.c
+++ b/fs/iomap.c
@@ -863,7 +863,8 @@ iomap_write_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		written += copied;
 		length -= copied;
 
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+		if (!(iomap->flags & IOMAP_F_UNBALANCED))
+			balance_dirty_pages_ratelimited(inode->i_mapping);
 	} while (iov_iter_count(i) && length);
 
 	return written ? written : status;
@@ -945,7 +946,8 @@ iomap_dirty_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
 		written += status;
 		length -= status;
 
-		balance_dirty_pages_ratelimited(inode->i_mapping);
+		if (!(iomap->flags & IOMAP_F_UNBALANCED))
+			balance_dirty_pages_ratelimited(inode->i_mapping);
 	} while (length);
 
 	return written;
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 0fefb5455bdaf..e9a04e76a3217 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -35,6 +35,7 @@ struct vm_fault;
 #define IOMAP_F_NEW		0x01	/* blocks have been newly allocated */
 #define IOMAP_F_DIRTY		0x02	/* uncommitted metadata */
 #define IOMAP_F_BUFFER_HEAD	0x04	/* file system requires buffer heads */
+#define IOMAP_F_UNBALANCED	0x08	/* don't balance dirty pages */
 
 /*
  * Flags that only need to be reported for IOMAP_REPORT requests:
-- 
2.20.1




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux