[PATCH 2/4] [RFC] xfs: limit speculative prealloc size on sparse files

Dave Chinner <david@xxxxxxxxxxxxx> · Mon, 21 Jan 2013 23:53:53 +1100

From: Dave Chinner <dchinner@xxxxxxxxxx>

This is an RFC that follow sup from a conversion Eric and I had on
IRC.  The idea is to prevent EOF speculative preallocation from
triggering larger allocations on IO patterns of
truncate--to-zero-seek-write-seek-write-....  which results in
non-sparse files for large files. This, unfortunately, is the way cp
behaves when copying sparse files, and it results in sub-optimal
destination file layouts.

What this code does is that it looks at the current extent over the
new EOF location, and if it is a hole it turns off preallocation
altogether. To avoid the next write from doing a large prealloc, it
takes the size of subsequent preallocations from the current size of
the existing EOF extent. IOWs, if you leave a hole in the file, it
resets preallocation behaviour to the same as if it was a zero size
file.

I haven't fully tested this, so I'm not sure if it works exactly
like I think it should, but I wanted to get this out there to get
more eyes on it...

Example new behaviour:

$ xfs_io -f -c "pwrite 0 31m" \
            -c "pwrite 33m 1m" \
            -c "pwrite 128m 1m" \
            -c "fiemap -v" /mnt/scratch/blah
wrote 32505856/32505856 bytes at offset 0
31 MiB, 7936 ops; 0.0000 sec (1.608 GiB/sec and 421432.7439 ops/sec)
wrote 1048576/1048576 bytes at offset 34603008
1 MiB, 256 ops; 0.0000 sec (1.462 GiB/sec and 383233.5329 ops/sec)
wrote 1048576/1048576 bytes at offset 134217728
1 MiB, 256 ops; 0.0000 sec (1.719 GiB/sec and 450704.2254 ops/sec)
/mnt/scratch/blah:
 EXT: FILE-OFFSET      BLOCK-RANGE      TOTAL FLAGS
   0: [0..65535]:      96..65631        65536   0x0
   1: [65536..67583]:  hole              2048
   2: [67584..69631]:  67680..69727      2048   0x0
   3: [69632..262143]: hole             192512
   4: [262144..264191]: 262240..264287    2048   0x1

Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx>
---
 fs/xfs/xfs_iomap.c |   65 ++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 56 insertions(+), 9 deletions(-)

diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4..3587772 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -311,6 +311,53 @@ xfs_iomap_eof_want_preallocate(
 }
 
 /*
+ * Determine the initial size of the preallocation. It will be bound by the
+ * current file size, but when we are extending sparse files the current file
+ * size is not a good metric to use. Hence we need to look up the extent that
+ * ends at the current EOF and use the result to determine the preallocation
+ * size.
+ *
+ * If the extent is a hole, then preallocation is essentially disabled.
+ * Otherwise we take the size of the data extent as the basis for the
+ * preallocation size. If the size of the extent is greater than half the
+ * maximum extent length, then use the file size as the basis. This ensures that
+ * for large files the preallocation size always extends to MAXEXTLEN rather
+ * than falling short due to things like stripe unit/width alignment of real
+ * extents.
+ */
+STATIC int
+xfs_iomap_eof_prealloc_initial_size(
+	struct xfs_mount	*mp,
+	struct xfs_inode	*ip,
+	xfs_bmbt_irec_t		*imap,
+	int			nimaps)
+{
+	xfs_fileoff_t   start_fsb;
+	xfs_fsblock_t	firstblock;
+	int		imaps = 1;
+	int		error;
+
+	ASSERT(nimaps >= imaps);
+
+	/* if we are using a specific prealloc size, return now */
+	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
+		return 0;
+
+	start_fsb = XFS_B_TO_FSBT(mp, XFS_ISIZE(ip));
+	error = xfs_bmapi_read(ip, start_fsb, 1, imap, &imaps,
+			       XFS_BMAPI_ENTIRE);
+	if (error)
+		return 0;
+
+	ASSERT(imaps == 1);
+	if (imap[0].br_startblock == HOLESTARTBLOCK)
+		return 0;
+	if (imap[0].br_blockcount <= (MAXEXTLEN >> 1))
+		return imap[0].br_blockcount;
+	return XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
+}
+
+/*
  * If we don't have a user specified preallocation size, dynamically increase
  * the preallocation size as the size of the file grows. Cap the maximum size
  * at a single extent or less if the filesystem is near full. The closer the
@@ -319,20 +366,17 @@ xfs_iomap_eof_want_preallocate(
 STATIC xfs_fsblock_t
 xfs_iomap_prealloc_size(
 	struct xfs_mount	*mp,
-	struct xfs_inode	*ip)
+	struct xfs_inode	*ip,
+	xfs_bmbt_irec_t		*imap,
+	int			nimaps)
 {
 	xfs_fsblock_t		alloc_blocks = 0;
 
-	if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) {
+	alloc_blocks = xfs_iomap_eof_prealloc_initial_size(mp, ip, imap, nimaps);
+	if (alloc_blocks > 0) {
 		int shift = 0;
 		int64_t freesp;
 
-		/*
-		 * rounddown_pow_of_two() returns an undefined result
-		 * if we pass in alloc_blocks = 0. Hence the "+ 1" to
-		 * ensure we always pass in a non-zero value.
-		 */
-		alloc_blocks = XFS_B_TO_FSB(mp, XFS_ISIZE(ip)) + 1;
 		alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN,
 					rounddown_pow_of_two(alloc_blocks));
 
@@ -398,7 +442,10 @@ xfs_iomap_write_delay(
 
 retry:
 	if (prealloc) {
-		xfs_fsblock_t	alloc_blocks = xfs_iomap_prealloc_size(mp, ip);
+		xfs_fsblock_t	alloc_blocks;
+
+		alloc_blocks = xfs_iomap_prealloc_size(mp, ip, imap,
+						       XFS_WRITE_IMAPS);
 
 		aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
 		ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
-- 
1.7.10

_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs