This changes ext3 to skip the orphaned inoded while growing the file with DIO. It also creates a version of ext3_get_block that starts and ends a transaction. By starting and ending the transaction inside get_block, this is able to avoid lock inversion problems when the DIO code tries to take page locks inside blockdev_direct_IO. (transaction locks must always happen after page locks). Signed-off-by: Chris Mason <chris.mason@xxxxxxxxxx> diff -r f84d3216430d -r 218de24978fc fs/ext3/inode.c --- a/fs/ext3/inode.c Wed Nov 01 10:24:03 2006 -0500 +++ b/fs/ext3/inode.c Wed Nov 01 10:24:05 2006 -0500 @@ -1608,6 +1608,30 @@ static int ext3_releasepage(struct page return journal_try_to_free_buffers(journal, page, wait); } +static int ext3_get_block_direct_IO(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) +{ + int ret = 0; + handle_t *handle = ext3_journal_start(inode, DIO_CREDITS); + if (IS_ERR(handle)) { + ret = PTR_ERR(handle); + goto out; + } + ret = ext3_get_block(inode, iblock, bh_result, create); + /* + * Reacquire the handle: ext3_get_block() can restart the transaction + */ + handle = journal_current_handle(); + if (handle) { + int err; + err = ext3_journal_stop(handle); + if (!ret) + ret = err; + } +out: + return ret; +} + /* * If the O_DIRECT write will extend the file then add this inode to the * orphan list. So recovery will truncate it back to the original size @@ -1620,67 +1644,11 @@ static ssize_t ext3_direct_IO(int rw, st const struct iovec *iov, loff_t offset, unsigned long nr_segs) { - struct file *file = iocb->ki_filp; - struct inode *inode = file->f_mapping->host; - struct ext3_inode_info *ei = EXT3_I(inode); - handle_t *handle = NULL; - ssize_t ret; - int orphan = 0; - size_t count = iov_length(iov, nr_segs); - - if (rw == WRITE) { - loff_t final_size = offset + count; - - handle = ext3_journal_start(inode, DIO_CREDITS); - if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - goto out; - } - if (final_size > inode->i_size) { - ret = ext3_orphan_add(handle, inode); - if (ret) - goto out_stop; - orphan = 1; - ei->i_disksize = inode->i_size; - } - } - - ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, - offset, nr_segs, - ext3_get_block, NULL); - - /* - * Reacquire the handle: ext3_get_block() can restart the transaction - */ - handle = journal_current_handle(); - -out_stop: - if (handle) { - int err; - - if (orphan && inode->i_nlink) - ext3_orphan_del(handle, inode); - if (orphan && ret > 0) { - loff_t end = offset + ret; - if (end > inode->i_size) { - ei->i_disksize = end; - i_size_write(inode, end); - /* - * We're going to return a positive `ret' - * here due to non-zero-length I/O, so there's - * no way of reporting error returns from - * ext3_mark_inode_dirty() to userspace. So - * ignore it. - */ - ext3_mark_inode_dirty(handle, inode); - } - } - err = ext3_journal_stop(handle); - if (ret == 0) - ret = err; - } -out: - return ret; + struct inode *inode = iocb->ki_filp->f_mapping->host; + return blockdev_direct_IO_flags(rw, iocb, inode, inode->i_sb->s_bdev, + iov, offset, nr_segs, ext3_get_block_direct_IO, + NULL, DIO_PLACEHOLDERS | DIO_CREATE | + DIO_EXTEND | DIO_DROP_I_MUTEX); } /* - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html