[PATCH 4/4] populate: punch files after writing to fragment free space properly

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Darrick J. Wong <darrick.wong@xxxxxxxxxx>

The filesystem population code frequently allocates a large file and
punches out every other block ("swiss-cheese files") in an attempt to
cause the creation of a lot of metadata to fill out the btrees.  This
pattern, however, has a subtle bug if the writes to the swiss-cheese
file are not allocated in batches and we're trying to fragment the free
space records in order to achieve a certain metadata btree shape.

This is exactly what happens on a DAX filesystem, since we no longer
have the page cache to stage delalloc writes.  Each xfs_io pwrite call
to the multi-megabyte swiss-chese file turns into multiple 4k pwrites,
which means that file data blocks are allocated 4k at a time.  This can
be fatal to our goal of fragmenting the free space btrees because the
allocator sees a 4k allocation request and uses 4k blocks from the
fragmented parts of the free space to satisfy the "small" request.  When
this happens, the XFS populate function cannot fill out the free space
btree to sufficient height and tests fail.

(In regular delalloc mode we'd cache all those small write() in memory
and try for a single large allocation, which we'd generally get.)

To fix this, we need to force the filesystem to allocate all blocks
before freeing any blocks.  Split the creation of swiss-cheese files
into two parts: (a) writing data to the file to force allocation, and
(b) punching the holes to fragment free space.  It's a little hokey for
helpers to be modifying variables in the caller's scope, but there's not
really a better way to do that in bash.

This bug affects only XFS but we convert the one ext4 usage anyway.

Signed-off-by: Darrick J. Wong <darrick.wong@xxxxxxxxxx>
---
 common/populate |   54 ++++++++++++++++++++++++++++++++++++++++++------------
 1 file changed, 42 insertions(+), 12 deletions(-)


diff --git a/common/populate b/common/populate
index 7403dec3..4aab0274 100644
--- a/common/populate
+++ b/common/populate
@@ -120,6 +120,32 @@ _populate_xfs_qmount_option()
 	fi
 }
 
+# Set up a file that we'll later use to fragment metadata and free space.
+# Our strategy here is to force the fs to allocate large contiguous extents
+# to a file and then punch every other block to force the fs to suffer the
+# worst allocation outcome possible.
+#
+# NOTE: In order to prevent *subsequent* allocations from using the holes we
+# punch, we must store the relevant filenames for later.  This function
+# deliberately adds each file name to the @punch_files array, which must be
+# declared by the caller and will be picked up by __force_fragmentation.
+function __setup_fragmentation() {
+	local sz="$1"
+	local fname="$2"
+
+	$XFS_IO_PROG -f -c "pwrite -S 0x62 -W -b 1m 0 $sz" "${fname}"
+	punch_files+=("${fname}")
+}
+
+# Actually punch holes in the file.  Call this /after/ you're done calling
+# __setup_fragmentation.
+__force_fragmentation() {
+	for file in "${punch_files[@]}"; do
+		./src/punch-alternating "${file}"
+	done
+	punch_files=()
+}
+
 # Populate an XFS on the scratch device with (we hope) all known
 # types of metadata block
 _scratch_xfs_populate() {
@@ -132,6 +158,8 @@ _scratch_xfs_populate() {
 		esac
 	done
 
+	local punch_files=()
+
 	_populate_xfs_qmount_option
 	_scratch_mount
 	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
@@ -161,8 +189,7 @@ _scratch_xfs_populate() {
 	# - FMT_BTREE
 	echo "+ btree extents file"
 	nr="$((blksz * 2 / 16))"
-	$XFS_IO_PROG -f -c "pwrite -S 0x62 0 $((blksz * nr))" "${SCRATCH_MNT}/S_IFREG.FMT_BTREE"
-	./src/punch-alternating "${SCRATCH_MNT}/S_IFREG.FMT_BTREE"
+	__setup_fragmentation $((blksz * nr)) "${SCRATCH_MNT}/S_IFREG.FMT_BTREE"
 
 	# Directories
 	# - INLINE
@@ -257,8 +284,7 @@ _scratch_xfs_populate() {
 	# Free space btree
 	echo "+ freesp btree"
 	nr="$((blksz * 2 / 8))"
-	$XFS_IO_PROG -f -c "pwrite -S 0x62 0 $((blksz * nr))" "${SCRATCH_MNT}/BNOBT"
-	./src/punch-alternating "${SCRATCH_MNT}/BNOBT"
+	__setup_fragmentation $((blksz * nr)) "${SCRATCH_MNT}/BNOBT"
 
 	# Inode btree
 	echo "+ inobt btree"
@@ -280,8 +306,7 @@ _scratch_xfs_populate() {
 	if [ $is_rmapbt -gt 0 ]; then
 		echo "+ rmapbt btree"
 		nr="$((blksz * 2 / 24))"
-		$XFS_IO_PROG -f -c "pwrite -S 0x62 0 $((blksz * nr))" "${SCRATCH_MNT}/RMAPBT"
-		./src/punch-alternating "${SCRATCH_MNT}/RMAPBT"
+		__setup_fragmentation $((blksz * nr)) "${SCRATCH_MNT}/RMAPBT"
 	fi
 
 	# Realtime Reverse-mapping btree
@@ -289,8 +314,7 @@ _scratch_xfs_populate() {
 	if [ $is_rmapbt -gt 0 ] && [ $is_rt -gt 0 ]; then
 		echo "+ rtrmapbt btree"
 		nr="$((blksz * 2 / 32))"
-		$XFS_IO_PROG -f -R -c "pwrite -S 0x62 0 $((blksz * nr))" "${SCRATCH_MNT}/RTRMAPBT"
-		./src/punch-alternating "${SCRATCH_MNT}/RTRMAPBT"
+		__setup_fragmentation $((blksz * nr)) "${SCRATCH_MNT}/RTRMAPBT"
 	fi
 
 	# Reference-count btree
@@ -298,15 +322,17 @@ _scratch_xfs_populate() {
 	if [ $is_reflink -gt 0 ]; then
 		echo "+ reflink btree"
 		nr="$((blksz * 2 / 12))"
-		$XFS_IO_PROG -f -c "pwrite -S 0x62 0 $((blksz * nr))" "${SCRATCH_MNT}/REFCOUNTBT"
+		__setup_fragmentation $((blksz * nr)) "${SCRATCH_MNT}/REFCOUNTBT"
 		cp --reflink=always "${SCRATCH_MNT}/REFCOUNTBT" "${SCRATCH_MNT}/REFCOUNTBT2"
-		./src/punch-alternating "${SCRATCH_MNT}/REFCOUNTBT"
 	fi
 
 	# Copy some real files (xfs tests, I guess...)
 	echo "+ real files"
 	test $fill -ne 0 && __populate_fill_fs "${SCRATCH_MNT}" 5
 
+	# Make sure we get all the fragmentation we asked for
+	__force_fragmentation
+
 	umount "${SCRATCH_MNT}"
 }
 
@@ -322,6 +348,8 @@ _scratch_ext4_populate() {
 		esac
 	done
 
+	local punch_files=()
+
 	_scratch_mount
 	blksz="$(stat -f -c '%s' "${SCRATCH_MNT}")"
 	dblksz="${blksz}"
@@ -342,8 +370,7 @@ _scratch_ext4_populate() {
 	# - FMT_ETREE
 	echo "+ extent tree file"
 	nr="$((blksz * 2 / 12))"
-	$XFS_IO_PROG -f -c "pwrite -S 0x62 0 $((blksz * nr))" "${SCRATCH_MNT}/S_IFREG.FMT_ETREE"
-	./src/punch-alternating "${SCRATCH_MNT}/S_IFREG.FMT_ETREE"
+	__setup_fragmentation $((blksz * nr)) "${SCRATCH_MNT}/S_IFREG.FMT_ETREE"
 
 	# Directories
 	# - INLINE
@@ -406,6 +433,9 @@ _scratch_ext4_populate() {
 	echo "+ real files"
 	test $fill -ne 0 && __populate_fill_fs "${SCRATCH_MNT}" 5
 
+	# Make sure we get all the fragmentation we asked for
+	__force_fragmentation
+
 	umount "${SCRATCH_MNT}"
 }
 




[Index of Archives]     [XFS Filesystem Development (older mail)]     [Linux Filesystem Development]     [Linux Audio Users]     [Yosemite Trails]     [Linux Kernel]     [Linux RAID]     [Linux SCSI]


  Powered by Linux