I am trying to investigate how freelist allocator in xfs interacts with freespace B+Tree allocator. First I prepared a patch against linux-source/fs/xfs/libxfs/xfs_alloc.c to print debugging messages (The kernel version used is linux-3.10.0-327.22.2.el7). Then, I wrote a simple utility to make TONS of holes in a filesystem by calling fallocate() to punch holes in a file that is almost as large as the volume size. I created an XFS filesystem image by the following steps: 1. fallocate -l 80G /mnt/disk2/xfs 2. mkfs.xfs -f -d agcount=1 /mnt/disk2/xfs Then I created a large file by fallocate: fallocate -l 85823746048 /mnt/test/abc which left only 4 blocks available in the volume finally: /dev/loop0 20961280 20961276 4 100% /mnt/test The result of xfs_bmap against /mnt/test/abc: /mnt/test/abc: EXT: FILE-OFFSET BLOCK-RANGE AG AG-OFFSET TOTAL FLAGS 0: [0..167624503]: 83000..167707503 0 (83000..167707503) 167624504 10000 After that, I used the hole-punching utility above to create holes on the files, and captured the output of kmsg. When reading the log output, I realised that there is no B+Tree split triggered by xfs_alloc_fix_freelist() when calling xfs_free_extent(). Isn't B+Tree split possible in by-size B+Tree even when truncating a longer freespace record to shorter one? But what I found in the log is only a few tree shrinks... And when reading the source code of freespace allocator I found that a B+Tree growth in this case is impossible at least...
Attachment:
kernel-log
Description: Binary data
#define _GNU_SOURCE #define _FILE_OFFSET_BITS 64 #include <unistd.h> #include <linux/falloc.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <sys/stat.h> #include <errno.h> static void usage(int argc, char **argv) { (void)argc; printf("Usage: %s <Path>\n", argv[0]); exit(EXIT_FAILURE); } int main(int argc, char **argv) { int fd, ret; off_t i, blk; const int blk_size = 4096; struct stat file_stat; if (argc != 2) usage(argc, argv); fd = open(argv[1], O_RDWR); if (fd < 0) { perror("open"); return EXIT_FAILURE; } ret = fstat(fd, &file_stat); if (ret < 0) { perror("stat"); close(fd); return EXIT_FAILURE; } blk = (file_stat.st_size + blk_size - 1) / blk_size; for (i = 0; i < blk; i += 10) { printf("i: %llu\n", i); ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, i * blk_size, blk_size * 6); if (ret < 0) { perror("fallcate"); close(fd); return EXIT_FAILURE; } } fsync(fd); close(fd); return EXIT_SUCCESS; }
--- linux-3.10.0-327.22.2.el7.a/fs/xfs/libxfs/xfs_alloc.c 2016-07-07 03:57:19.366512544 +0800 +++ linux-3.10.0-327.22.2.el7.b/fs/xfs/libxfs/xfs_alloc.c 2016-07-07 09:30:32.178766480 +0800 @@ -309,7 +309,8 @@ xfs_alloc_fixup_trees( xfs_extlen_t flen, /* length of free extent */ xfs_agblock_t rbno, /* starting block of returned extent */ xfs_extlen_t rlen, /* length of returned extent */ - int flags) /* flags, XFSA_FIXUP_... */ + int flags, /* flags, XFSA_FIXUP_... */ + int isfl) { int error; /* error code */ int i; /* operation results */ @@ -376,15 +377,27 @@ xfs_alloc_fixup_trees( nfbno1 = rbno + rlen; nflen1 = flen - rlen; nfbno2 = NULLAGBLOCK; + if (isfl) + xfs_warn(mp, + "Case 1 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d", + nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen); } else if (rbno + rlen == fbno + flen) { nfbno1 = fbno; nflen1 = flen - rlen; nfbno2 = NULLAGBLOCK; + if (isfl) + xfs_warn(mp, + "Case 2 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d", + nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen); } else { nfbno1 = fbno; nflen1 = rbno - fbno; nfbno2 = rbno + rlen; nflen2 = (fbno + flen) - nfbno2; + if (isfl) + xfs_warn(mp, + "Case 3 Inserting: nfbno1: %d, nflen1: %d, nfbno2: %d, nflen2: %d, fbno: %d, flen: %d, rbno: %d, rlen: %d", + nfbno1, nflen1, nfbno2, nflen2, fbno, flen, rbno, rlen); } /* * Delete the entry from the by-size btree. @@ -396,19 +409,31 @@ xfs_alloc_fixup_trees( * Add new by-size btree entry(s). */ if (nfbno1 != NULLAGBLOCK) { + struct xfs_btree_block *cntblock; if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno1, nflen1, &i))) return error; + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); XFS_WANT_CORRUPTED_RETURN(mp, i == 0); + xfs_warn(mp, + "B+Tree before insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0])); if ((error = xfs_btree_insert(cnt_cur, &i))) return error; + xfs_warn(mp, + "B+Tree after insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0])); XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } if (nfbno2 != NULLAGBLOCK) { + struct xfs_btree_block *cntblock; + cntblock = XFS_BUF_TO_BLOCK(cnt_cur->bc_bufs[0]); if ((error = xfs_alloc_lookup_eq(cnt_cur, nfbno2, nflen2, &i))) return error; XFS_WANT_CORRUPTED_RETURN(mp, i == 0); + xfs_warn(mp, + "B+Tree before insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0])); if ((error = xfs_btree_insert(cnt_cur, &i))) return error; + xfs_warn(mp, + "B+Tree after insert: isfl: %d, bb_numrec: %d, addr: %llu", isfl, xfs_btree_get_numrecs(cntblock), XFS_BUF_ADDR(cnt_cur->bc_bufs[0])); XFS_WANT_CORRUPTED_RETURN(mp, i == 1); } /* @@ -730,7 +755,7 @@ xfs_alloc_ag_vextent_exact( ASSERT(args->agbno + args->len <= be32_to_cpu(XFS_BUF_TO_AGF(args->agbp)->agf_length)); error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, args->agbno, - args->len, XFSA_FIXUP_BNO_OK); + args->len, XFSA_FIXUP_BNO_OK, args->isfl); if (error) { xfs_btree_del_cursor(cnt_cur, XFS_BTREE_ERROR); goto error0; @@ -1028,7 +1053,7 @@ restart: * Fix up the btree entries. */ if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, - ltlen, bnew, blen, XFSA_FIXUP_CNT_OK))) + ltlen, bnew, blen, XFSA_FIXUP_CNT_OK, args->isfl))) goto error0; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur_lt, XFS_BTREE_NOERROR); @@ -1219,7 +1244,7 @@ restart: args->agbno = ltnew; if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur_lt, ltbno, ltlen, - ltnew, rlen, XFSA_FIXUP_BNO_OK))) + ltnew, rlen, XFSA_FIXUP_BNO_OK, args->isfl))) goto error0; if (j) @@ -1420,7 +1445,7 @@ restart: bno_cur = xfs_allocbt_init_cursor(args->mp, args->tp, args->agbp, args->agno, XFS_BTNUM_BNO); if ((error = xfs_alloc_fixup_trees(cnt_cur, bno_cur, fbno, flen, - rbno, rlen, XFSA_FIXUP_CNT_OK))) + rbno, rlen, XFSA_FIXUP_CNT_OK, args->isfl))) goto error0; xfs_btree_del_cursor(cnt_cur, XFS_BTREE_NOERROR); xfs_btree_del_cursor(bno_cur, XFS_BTREE_NOERROR);
_______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs