From: Dave Chinner <dchinner@xxxxxxxxxx> If we do unaligned direct IO to a unwritten extent, the direct Io code does sub-block zeroing for us. However, we need to serialise the sub-block zeroing as we canot have another outstanding IO to the same block that is being zeroed at the same time. Ifw e do so, then we'll have two direct IOs to the same block that zero different portions of the block and we'll end up with zeros where we should have data. Serialise such IOs in the get_blocks callback when we know that we are mapping an unaligned direct IO. This initial implementation is the sledge-hammer approach - we can probably be finer grained than this to avoid excessive serialisation when all IO is unaligned to a file. Signed-off-by: Dave Chinner <dchinner@xxxxxxxxxx> --- fs/xfs/linux-2.6/xfs_aops.c | 47 +++++++++++++++++++++++++++++++++--------- 1 files changed, 37 insertions(+), 10 deletions(-) diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c index 8abbf05..5682490 100644 --- a/fs/xfs/linux-2.6/xfs_aops.c +++ b/fs/xfs/linux-2.6/xfs_aops.c @@ -1286,15 +1286,17 @@ __xfs_get_blocks( struct inode *inode, sector_t iblock, struct buffer_head *bh_result, - int create, + int flags, int direct) { - int flags = create ? BMAPI_WRITE : BMAPI_READ; + int create = flags & GET_BLOCKS_CREATE; + int bflags = create ? BMAPI_WRITE : BMAPI_READ; struct xfs_bmbt_irec imap; xfs_off_t offset; ssize_t size; - int nimap = 1; - int new = 0; + int nimap; + int new; + int iolock_changed = 0; int error; offset = (xfs_off_t)iblock << inode->i_blkbits; @@ -1305,15 +1307,40 @@ __xfs_get_blocks( return 0; if (direct && create) - flags |= BMAPI_DIRECT; + bflags |= BMAPI_DIRECT; - error = xfs_iomap(XFS_I(inode), offset, size, flags, &imap, &nimap, +remap: + memset(&imap, 0, sizeof(imap)); + nimap = 1; + new = 0; + error = xfs_iomap(XFS_I(inode), offset, size, bflags, &imap, &nimap, &new); if (error) return -error; if (nimap == 0) return 0; + /* + * If this is an unwritten extent and we are doing unaligned direct IO + * to it, we need to serialise the sub-block zeroing across multiple + * concurrent IOs. As a brute-force way of doing that, promote the + * IOLOCK we hold to exclusive to prevent new IO from being issued, + * wait for all the current IO to drain (and potentially convert the + * unwritten extents) and remap the extent once all the IO has drained. + * Then we can demote the IOLOCK back to shared and proceed with the + * IO. + */ + if (direct && create && ISUNWRITTEN(&imap) && + (flags & GET_BLOCKS_UNALIGNED) && !iolock_changed) { + xfs_iunlock(XFS_I(inode), XFS_IOLOCK_SHARED); + xfs_ilock(XFS_I(inode), XFS_IOLOCK_EXCL); + xfs_ioend_wait(XFS_I(inode)); + iolock_changed = 1; + goto remap; + } + if (iolock_changed) + xfs_ilock_demote(XFS_I(inode), XFS_IOLOCK_EXCL); + if (imap.br_startblock != HOLESTARTBLOCK && imap.br_startblock != DELAYSTARTBLOCK) { /* @@ -1386,9 +1413,9 @@ xfs_get_blocks( struct inode *inode, sector_t iblock, struct buffer_head *bh_result, - int create) + int flags) { - return __xfs_get_blocks(inode, iblock, bh_result, create, 0); + return __xfs_get_blocks(inode, iblock, bh_result, flags, 0); } STATIC int @@ -1396,9 +1423,9 @@ xfs_get_blocks_direct( struct inode *inode, sector_t iblock, struct buffer_head *bh_result, - int create) + int flags) { - return __xfs_get_blocks(inode, iblock, bh_result, create, 1); + return __xfs_get_blocks(inode, iblock, bh_result, flags, 1); } STATIC void -- 1.7.1 _______________________________________________ xfs mailing list xfs@xxxxxxxxxxx http://oss.sgi.com/mailman/listinfo/xfs