[PATCH 60/76] xfs: implement CoW for directio writes

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For O_DIRECT writes to shared blocks, we have to CoW them just like
we would with buffered writes.  For writes that are not block-aligned,
just bounce them to the page cache.

For block-aligned writes, however, we can do better than that.  Use
the same mechanisms that we employ for buffered CoW to set up a
delalloc reservation, allocate all the blocks at once, issue the
writes against the new blocks and use the same ioend functions to
remap the blocks after the write.  This should be fairly performant.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 fs/xfs/xfs_aops.c    |   63 +++++++++++++++++++++++++---
 fs/xfs/xfs_file.c    |   12 ++++-
 fs/xfs/xfs_reflink.c |  114 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_reflink.h |    5 ++
 4 files changed, 186 insertions(+), 8 deletions(-)


diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 8101d6a..4b77d07 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1339,7 +1339,8 @@ xfs_map_direct(
 	struct buffer_head	*bh_result,
 	struct xfs_bmbt_irec	*imap,
 	xfs_off_t		offset,
-	bool			dax_fault)
+	bool			dax_fault,
+	bool			is_cow)
 {
 	struct xfs_ioend	*ioend;
 	xfs_off_t		size = bh_result->b_size;
@@ -1368,20 +1369,23 @@ xfs_map_direct(
 
 		if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
 			ioend->io_type = XFS_IO_UNWRITTEN;
+		if (is_cow)
+			ioend->io_flags |= XFS_IOEND_COW;
 
 		trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
 					      ioend->io_size, ioend->io_type,
 					      imap);
-	} else if (type == XFS_IO_UNWRITTEN ||
+	} else if (type == XFS_IO_UNWRITTEN || is_cow ||
 		   offset + size > i_size_read(inode) ||
 		   offset + size < 0) {
 		ioend = xfs_alloc_ioend(inode, type);
 		ioend->io_offset = offset;
 		ioend->io_size = size;
+		if (is_cow)
+			ioend->io_flags |= XFS_IOEND_COW;
 
 		bh_result->b_private = ioend;
 		set_buffer_defer_completion(bh_result);
-
 		trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
 					   imap);
 	} else {
@@ -1449,6 +1453,8 @@ __xfs_get_blocks(
 	xfs_off_t		offset;
 	ssize_t			size;
 	int			new = 0;
+	bool			is_cow = false;
+	bool			need_alloc = false;
 
 	if (XFS_FORCED_SHUTDOWN(mp))
 		return -EIO;
@@ -1480,8 +1486,15 @@ __xfs_get_blocks(
 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
 
-	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
-				&imap, &nimaps, XFS_BMAPI_ENTIRE);
+	if (create && direct)
+		is_cow = xfs_reflink_is_cow_pending(ip, offset);
+	if (is_cow)
+		error = xfs_reflink_find_cow_mapping(ip, offset, &imap,
+						     &need_alloc);
+	else
+		error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
+					&imap, &nimaps, XFS_BMAPI_ENTIRE);
+	ASSERT(!need_alloc);
 	if (error)
 		goto out_unlock;
 
@@ -1553,13 +1566,33 @@ __xfs_get_blocks(
 	if (imap.br_startblock != HOLESTARTBLOCK &&
 	    imap.br_startblock != DELAYSTARTBLOCK &&
 	    (create || !ISUNWRITTEN(&imap))) {
+		if (create && direct && !is_cow) {
+			bool shared;
+
+			error = xfs_reflink_irec_is_shared(ip, &imap, &shared);
+			if (error)
+				return error;
+			/*
+			 * Are we doing a DIO write to a shared block?  In
+			 * the ideal world we at least would fork full blocks,
+			 * but for now just fall back to buffered mode.  Yuck.
+			 * Use -EREMCHG ("remote address changed") to signal
+			 * this, since in general XFS doesn't do this sort of
+			 * fallback.
+			 */
+			if (shared) {
+				trace_xfs_reflink_bounce_dio_write(ip, &imap);
+				return -EREMCHG;
+			}
+		}
+
 		xfs_map_buffer(inode, bh_result, &imap, offset);
 		if (ISUNWRITTEN(&imap))
 			set_buffer_unwritten(bh_result);
 		/* direct IO needs special help */
 		if (create && direct)
 			xfs_map_direct(inode, bh_result, &imap, offset,
-				       dax_fault);
+				       dax_fault, is_cow);
 	}
 
 	/*
@@ -1738,6 +1771,24 @@ xfs_vm_do_dio(
 	int			flags)
 {
 	struct block_device	*bdev;
+	loff_t			end;
+	loff_t			block_mask;
+	int			error;
+
+	/* If this is a block-aligned directio CoW, remap immediately. */
+	end = offset + iov_iter_count(iter);
+	block_mask = (1 << inode->i_blkbits) - 1;
+	if (xfs_is_reflink_inode(XFS_I(inode)) && iov_iter_rw(iter) == WRITE &&
+	    !(offset & block_mask) && !(end & block_mask)) {
+		error = xfs_reflink_reserve_cow_range(XFS_I(inode), offset,
+				iov_iter_count(iter));
+		if (error)
+			return error;
+		error = xfs_reflink_allocate_cow_range(XFS_I(inode), offset,
+				iov_iter_count(iter));
+		if (error)
+			return error;
+	}
 
 	if (IS_DAX(inode))
 		return dax_do_io(iocb, inode, iter, offset,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 0fbcb38..31b002e 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -892,10 +892,18 @@ xfs_file_write_iter(
 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
 		return -EIO;
 
-	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode))
+	/*
+	 * Allow DIO to fall back to buffered *only* in the case that we're
+	 * doing a reflink CoW.
+	 */
+	if ((iocb->ki_flags & IOCB_DIRECT) || IS_DAX(inode)) {
 		ret = xfs_file_dio_aio_write(iocb, from);
-	else
+		if (ret == -EREMCHG)
+			goto buffered;
+	} else {
+buffered:
 		ret = xfs_file_buffered_aio_write(iocb, from);
+	}
 
 	if (ret > 0) {
 		ssize_t err;
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index 9c1c262..8594bc4 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -134,6 +134,56 @@ xfs_trim_extent(
 	}
 }
 
+/**
+ * xfs_reflink_irec_is_shared() -- Are any of the blocks in this mapping
+ *				   shared?
+ *
+ * @ip: XFS inode object
+ * @irec: the fileoff:fsblock mapping that we might fork
+ * @shared: set to true if the mapping is shared.
+ */
+int
+xfs_reflink_irec_is_shared(
+	struct xfs_inode	*ip,
+	struct xfs_bmbt_irec	*irec,
+	bool			*shared)
+{
+	xfs_agnumber_t		agno;
+	xfs_agblock_t		agbno;
+	xfs_extlen_t		aglen;
+	xfs_agblock_t		fbno;
+	xfs_extlen_t		flen;
+	int			error = 0;
+
+	/* Holes, unwritten, and delalloc extents cannot be shared */
+	if (!xfs_is_reflink_inode(ip) ||
+	    ISUNWRITTEN(irec) ||
+	    irec->br_startblock == HOLESTARTBLOCK ||
+	    irec->br_startblock == DELAYSTARTBLOCK) {
+		*shared = false;
+		return 0;
+	}
+
+	trace_xfs_reflink_irec_is_shared(ip, irec);
+
+	agno = XFS_FSB_TO_AGNO(ip->i_mount, irec->br_startblock);
+	agbno = XFS_FSB_TO_AGBNO(ip->i_mount, irec->br_startblock);
+	aglen = irec->br_blockcount;
+
+	/* Are there any shared blocks here? */
+	error = xfs_refcount_find_shared(ip->i_mount, agno, agbno,
+			aglen, &fbno, &flen, false);
+	if (error)
+		return error;
+	if (flen == 0) {
+		*shared = false;
+		return 0;
+	}
+
+	*shared = true;
+	return 0;
+}
+
 /* Find the shared ranges under an irec, and set up delalloc extents. */
 STATIC int
 xfs_reflink_reserve_cow_extent(
@@ -251,6 +301,70 @@ xfs_reflink_reserve_cow_range(
 }
 
 /**
+ * xfs_reflink_allocate_cow_range() -- Allocate blocks to satisfy a copy on
+ *				       write operation.
+ * @ip: XFS inode.
+ * @pos: file offset to start CoWing.
+ * @len: number of bytes to CoW.
+ */
+int
+xfs_reflink_allocate_cow_range(
+	struct xfs_inode	*ip,
+	xfs_off_t		pos,
+	xfs_off_t		len)
+{
+	struct xfs_ifork	*ifp;
+	struct xfs_bmbt_rec_host	*gotp;
+	struct xfs_bmbt_irec	imap;
+	int			error = 0;
+	xfs_fileoff_t		start_lblk;
+	xfs_fileoff_t		end_lblk;
+	xfs_extnum_t		idx;
+
+	if (!xfs_is_reflink_inode(ip))
+		return 0;
+
+	trace_xfs_reflink_allocate_cow_range(ip, len, pos, 0);
+
+	start_lblk = XFS_B_TO_FSBT(ip->i_mount, pos);
+	end_lblk = XFS_B_TO_FSB(ip->i_mount, pos + len);
+	ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK);
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+
+	gotp = xfs_iext_bno_to_ext(ifp, start_lblk, &idx);
+	while (gotp) {
+		xfs_bmbt_get_all(gotp, &imap);
+
+		if (imap.br_startoff >= end_lblk)
+			break;
+		if (!isnullstartblock(imap.br_startblock))
+			goto advloop;
+		xfs_trim_extent(&imap, start_lblk, end_lblk - start_lblk);
+		trace_xfs_reflink_allocate_cow_extent(ip, &imap);
+
+		xfs_iunlock(ip, XFS_ILOCK_EXCL);
+		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK,
+				XFS_FSB_TO_B(ip->i_mount, imap.br_startoff +
+						imap.br_blockcount - 1), &imap);
+		xfs_ilock(ip, XFS_ILOCK_EXCL);
+		if (error)
+			break;
+advloop:
+		/* Roll on... */
+		idx++;
+		if (idx >= ifp->if_bytes / sizeof(xfs_bmbt_rec_t))
+			break;
+		gotp = xfs_iext_get_ext(ifp, idx);
+	}
+
+	xfs_iunlock(ip, XFS_ILOCK_EXCL);
+
+	if (error)
+		trace_xfs_reflink_allocate_cow_range_error(ip, error, _RET_IP_);
+	return error;
+}
+
+/**
  * xfs_reflink_is_cow_pending() -- Determine if CoW is pending for a given
  *				   file and offset.
  *
diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h
index 8ec1ebb..d356c00 100644
--- a/fs/xfs/xfs_reflink.h
+++ b/fs/xfs/xfs_reflink.h
@@ -18,8 +18,13 @@
 #ifndef __XFS_REFLINK_H
 #define __XFS_REFLINK_H 1
 
+extern int xfs_reflink_irec_is_shared(struct xfs_inode *ip,
+		struct xfs_bmbt_irec *imap, bool *shared);
+
 extern int xfs_reflink_reserve_cow_range(struct xfs_inode *ip, xfs_off_t pos,
 		xfs_off_t len);
+extern int xfs_reflink_allocate_cow_range(struct xfs_inode *ip, xfs_off_t pos,
+		xfs_off_t len);
 extern bool xfs_reflink_is_cow_pending(struct xfs_inode *ip, xfs_off_t offset);
 extern int xfs_reflink_find_cow_mapping(struct xfs_inode *ip, xfs_off_t offset,
 		struct xfs_bmbt_irec *imap, bool *need_alloc);

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs



[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux