From: Dave Chinner <dchinner@xxxxxxxxxx> Currently the size of the speculative preallocation during delayed allocation is fixed by either the allocsize mount option of a default size. We are seeing a lot of cases where we need to recommend using the allocsize mount option to prevent fragmentation when buffered writes land in the same AG. Rather than using a fixed preallocation size by default (up to 64k), make it dynamic by exponentially increasing it on each subsequent preallocation. This will result in the preallocation size increasing as the file increases, so for streaming writes we are much more likely to get large preallocations exactly when we need it to reduce fragementation. It should also prevent the need for using the allocsize mount option for most workloads involving concurrent streaming writes. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/xfs_inode.h | 1 + fs/xfs/xfs_iomap.c | 39 +++++++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 39f8c78..1594190 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -248,6 +248,7 @@ typedef struct xfs_inode { mrlock_t i_iolock; /* inode IO lock */ struct completion i_flush; /* inode flush completion q */ atomic_t i_pincount; /* inode pin count */ + unsigned int i_last_prealloc; /* last EOF prealloc size */ wait_queue_head_t i_ipin_wait; /* inode pinning wait queue */ spinlock_t i_flags_lock; /* inode i_flags lock */ /* Miscellaneous state. */ diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 2057614..b2e4782 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -389,6 +389,9 @@ error_out: * If the caller is doing a write at the end of the file, then extend the * allocation out to the file system's write iosize. We clean up any extra * space left over when the file is closed in xfs_inactive(). + * + * If we find we already have delalloc preallocation out to alloc_blocks + * beyond EOF, don't do more preallocation as it it not needed. */ STATIC int xfs_iomap_eof_want_preallocate( @@ -405,6 +408,7 @@ xfs_iomap_eof_want_preallocate( xfs_filblks_t count_fsb; xfs_fsblock_t firstblock; int n, error, imaps; + int found_delalloc = 0; *prealloc = 0; if ((offset + count) <= ip->i_size) @@ -427,11 +431,25 @@ xfs_iomap_eof_want_preallocate( if ((imap[n].br_startblock != HOLESTARTBLOCK) && (imap[n].br_startblock != DELAYSTARTBLOCK)) return 0; + start_fsb += imap[n].br_blockcount; count_fsb -= imap[n].br_blockcount; + + /* count delalloc blocks beyond EOF */ + if (imap[n].br_startblock == DELAYSTARTBLOCK) + found_delalloc += imap[n].br_blockcount; } } - *prealloc = 1; + if (!found_delalloc) { + /* haven't got any prealloc, so need some */ + *prealloc = 1; + } else if (found_delalloc <= count_fsb) { + /* almost run out of prealloc */ + *prealloc = 1; + } else { + /* still lots of prealloc left */ + *prealloc = 0; + } return 0; } @@ -469,6 +487,7 @@ xfs_iomap_write_delay( extsz = xfs_get_extsz_hint(ip); offset_fsb = XFS_B_TO_FSBT(mp, offset); + error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count, ioflag, imap, XFS_WRITE_IMAPS, &prealloc); if (error) @@ -476,9 +495,25 @@ xfs_iomap_write_delay( retry: if (prealloc) { + xfs_fileoff_t alloc_blocks = 0; + /* + * If we don't have a user specified preallocation size, dynamically + * increase the preallocation size as we do more preallocation. + * Cap the maximum size at a single extent. + */ + if (!(mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)) { + alloc_blocks = XFS_FILEOFF_MIN(MAXEXTLEN, + (ip->i_last_prealloc * 4)); + } + if (alloc_blocks == 0) + alloc_blocks = mp->m_writeio_blocks; + ip->i_last_prealloc = alloc_blocks; + aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1)); ioalign = XFS_B_TO_FSBT(mp, aligned_offset); - last_fsb = ioalign + mp->m_writeio_blocks; + last_fsb = ioalign + alloc_blocks; + printk("ino %lld, ioalign 0x%llx, alloc_blocks 0x%llx\n", + ip->i_ino, ioalign, alloc_blocks); } else { last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count))); } -- 1.7.1 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs