Some questions about the freelist allocator in XFS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I am trying to investigate how freelist allocator in xfs interacts
with freespace B+Tree allocator.
First I prepared a patch against
linux-source/fs/xfs/libxfs/xfs_alloc.c to print debugging messages
(The kernel version used is linux-3.10.0-327.22.2.el7).
Then, I wrote a simple utility to make TONS of
holes in a filesystem by calling fallocate() to punch holes in a file
that is almost as large as the volume size.

I created an XFS filesystem image by the following steps:
1. fallocate -l 80G /mnt/disk2/xfs
2. mkfs.xfs -f -d agcount=1 /mnt/disk2/xfs

Then I created a large file by fallocate:
fallocate -l 85823746048 /mnt/test/abc

which left only 4 blocks available in the volume finally:
/dev/loop0      20961280 20961276         4 100% /mnt/test

The result of xfs_bmap against /mnt/test/abc:
/mnt/test/abc:
 EXT: FILE-OFFSET      BLOCK-RANGE      AG AG-OFFSET              TOTAL FLAGS
   0: [0..167624503]:  83000..167707503  0 (83000..167707503) 167624504 10000

After that, I used the hole-punching utility above to create holes on
the files, and captured the output of kmsg.

When reading the log output, I realised that there is no B+Tree split
triggered by xfs_alloc_fix_freelist() when calling xfs_free_extent().
Isn't B+Tree split possible in by-size B+Tree even when truncating a
longer freespace record to shorter one? But what I found in the log is
only a few tree shrinks... And when reading the source code of
freespace allocator I found that a B+Tree growth in this case is
impossible at least...

Attachment: kernel-log
Description: Binary data

#define _GNU_SOURCE
#define _FILE_OFFSET_BITS 64

#include <unistd.h>
#include <linux/falloc.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <errno.h>

static void usage(int argc, char **argv)
{
	(void)argc;
	printf("Usage: %s <Path>\n", argv[0]);
	exit(EXIT_FAILURE);
}

int main(int argc, char **argv)
{
	int fd, ret;
	off_t i, blk;
	const int blk_size = 4096;
	struct stat file_stat;

	if (argc != 2)
		usage(argc, argv);

	fd = open(argv[1], O_RDWR);
	if (fd < 0) {
		perror("open");
		return EXIT_FAILURE;
	}

	ret = fstat(fd, &file_stat);
	if (ret < 0) {
		perror("stat");
		close(fd);
		return EXIT_FAILURE;
	}
	blk = (file_stat.st_size + blk_size - 1) / blk_size;
	for (i = 0; i < blk; i += 10) {
		printf("i: %llu\n", i);
		ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, i * blk_size, blk_size * 6);
		if (ret < 0) {
			perror("fallcate");
			close(fd);
			return EXIT_FAILURE;
		}
	}

	fsync(fd);
	close(fd);
	return EXIT_SUCCESS;
}
--- linux-3.10.0-327.22.2.el7.a/fs/xfs/libxfs/xfs_alloc.c	2016-07-07 03:57:19.366512544 +0800
+++ linux-3.10.0-327.22.2.el7.b/fs/xfs/libxfs/xfs_alloc.c	2016-07-07 09:30:32.178766480 +0800
@@ -309,7 +309,8 @@ xfs_alloc_fixup_trees(
 	xfs_extlen_t	flen,		/* length of free extent */
 	xfs_agblock_t	rbno,		/* starting block of returned extent */
 	xfs_extlen_t	rlen,		/* length of returned extent */
-	int		flags)		/* flags, XFSA_FIXUP_... */
+	int		flags,		/* flags, XFSA_FIXUP_... */
+	int		isfl)
 {
 	int		error;		/* error code */
 	int		i;		/* operation results */
@@ -376,15 +377,27 @@ xfs_alloc_fixup_trees(
 		nfbno1 = rbno + rlen;
 		nflen1 = flen - rlen;
 		nfbno2 = NULLAGBLOCK;
+		if (isfl)
+			xfs_warn(mp,
+				"Case 1 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d",
+				nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen);
 	} else if (rbno + rlen == fbno + flen) {
 		nfbno1 = fbno;
 		nflen1 = flen - rlen;
 		nfbno2 = NULLAGBLOCK;
+		if (isfl)
+			xfs_warn(mp,
+				"Case 2 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d",
+				nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen);
 	} else {
 		nfbno1 = fbno;
 		nflen1 = rbno - fbno;
 		nfbno2 = rbno + rlen;
 		nflen2 = (fbno + flen) - nfbno2;
+		if (isfl)
+			xfs_warn(mp,
+				"Case 3 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d",
+				nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen);
 	}
 	/*
 	 * Delete the entry from the by-size btree.
@@ -396,19 +409,31 @@ xfs_alloc_fixup_trees(
 	 * Add new by-size btree entry(s).
 	 */
 	if (nfbno1 != NULLAGBLOCK) {
+		struct xfs_btree_block	*cntblock;
 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i)))
 			return error;
+		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+		xfs_warn(mp,
+	"B+Tree before insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		if ((error = xfs_btree_insert(cnt_cur, &i)))
 			return error;
+		xfs_warn(mp,
+	"B+Tree after insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 	}
 	if (nfbno2 != NULLAGBLOCK) {
+		struct xfs_btree_block	*cntblock;
+		cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]);
 		if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i)))
 			return error;
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 0);
+		xfs_warn(mp,
+	"B+Tree before insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		if ((error = xfs_btree_insert(cnt_cur, &i)))
 			return error;
+		xfs_warn(mp,
+	"B+Tree after insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0]));
 		XFS_WANT_CORRUPTED_RETURN(mp, i == 1);
 	}
 	/*
@@ -730,7 +755,7 @@ xfs_alloc_ag_vextent_exact(
 	ASSERT(args->agbno + args->len <=
 		be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length));
 	error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno,
-				      args->len, XFSA_FIXUP_BNO_OK);
+				      args->len, XFSA_FIXUP_BNO_OK, args->isfl);
 	if (error) {
 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR);
 		goto error0;
@@ -1028,7 +1053,7 @@ restart:
 		 * Fix up the btree entries.
 		 */
 		if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno,
-				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK)))
+				ltlen, bnew, blen, XFSA_FIXUP_CNT_OK, args->isfl)))
 			goto error0;
 		xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 		xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR);
@@ -1219,7 +1244,7 @@ restart:
 	args->agbno = ltnew;
 
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen,
-			ltnew, rlen, XFSA_FIXUP_BNO_OK)))
+			ltnew, rlen, XFSA_FIXUP_BNO_OK, args->isfl)))
 		goto error0;
 
 	if (j)
@@ -1420,7 +1445,7 @@ restart:
 	bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp,
 		args->agno, XFS_BTNUM_BNO);
 	if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen,
-			rbno, rlen, XFSA_FIXUP_CNT_OK)))
+			rbno, rlen, XFSA_FIXUP_CNT_OK, args->isfl)))
 		goto error0;
 	xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR);
 	xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
_______________________________________________
xfs mailing list
xfs@xxxxxxxxxxx
http://oss.sgi.com/mailman/listinfo/xfs

[Index of Archives]     [Linux XFS Devel]     [Linux Filesystem Development]     [Filesystem Testing]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux