DIO: don't fall back to buffered writes Placeholder pages allow DIO to use locking rules similar to that of writepage. DIO can now fill holes, and it can extend the file via expanding truncates. This creates holes that are filled during the DIO write. i_mutex can be dropped during writes once DIO decides if the file needs to be extended. The expanding truncate may cause some pagecache pages to be dirtied. The call to find_or_insert_placeholders is changed to wait on dirty or writeback pages in the case of writes as well as reads. Signed-off-by: Chris Mason <chris.mason@xxxxxxxxxx> diff -r 30dcee85429c fs/direct-io.c --- a/fs/direct-io.c Fri Oct 27 13:49:19 2006 -0400 +++ b/fs/direct-io.c Fri Oct 27 13:50:48 2006 -0400 @@ -69,6 +69,7 @@ struct dio { int rw; loff_t i_size; /* i_size when submitted */ int lock_type; /* doesn't change */ + int reacquire_i_mutex; /* should we get i_mutex when done? */ unsigned blkbits; /* doesn't change */ unsigned blkfactor; /* When we're using an alignment which is finer than the filesystem's soft @@ -216,8 +217,7 @@ static int lock_page_range(struct dio *d unsigned long end = start + nr; return find_or_insert_placeholders(mapping, dio->tmppages, start, end, ARRAY_SIZE(dio->tmppages), - GFP_KERNEL, fake, - dio->rw == READ); + GFP_KERNEL, fake, 1); } @@ -253,6 +253,8 @@ static void dio_complete(struct dio *dio unlock_page_range(dio, dio->fspages_start_off, dio->fspages_end_off - dio->fspages_start_off); dio->fspages_end_off = dio->fspages_start_off; + if (dio->reacquire_i_mutex) + mutex_lock(&dio->inode->i_mutex); } /* @@ -569,13 +571,8 @@ static int get_more_blocks(struct dio *d map_bh->b_size = fs_count << dio->inode->i_blkbits; create = dio->rw & WRITE; - if (dio->lock_type == DIO_LOCKING) { - if (dio->block_in_file < (i_size_read(dio->inode) >> - dio->blkbits)) - create = 0; - } else if (dio->lock_type == DIO_NO_LOCKING) { + if (dio->lock_type == DIO_NO_LOCKING) create = 0; - } index = fs_startblk >> (PAGE_CACHE_SHIFT - dio->inode->i_blkbits); if (index >= dio->fspages_end_off) { @@ -1283,6 +1280,33 @@ __blockdev_direct_IO(int rw, struct kioc dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && (end > i_size_read(inode))); + /* + * extend the file if needed, and drop i_mutex for non-aio writes. + * We extend the file by creating a hole that is later filled in + * during the O_DIRECT. Because pages are locked or placeholders + * are inserted, the locking rules end up being the same as + * mmap'd writes using writepage to fill holes + */ + dio->reacquire_i_mutex = 0; + if (dio_lock_type == DIO_LOCKING && (rw & WRITE)) { + /* if our write goes past i_size, do an expanding + * truncate to fill it before dropping i_mutex + */ + if (end > i_size_read(inode) && iocb->ki_filp) { + struct iattr newattrs; + newattrs.ia_size = end; + newattrs.ia_file = iocb->ki_filp; + newattrs.ia_valid = ATTR_SIZE | ATTR_FILE; + retval = notify_change(iocb->ki_filp->f_dentry, + &newattrs); + if (retval) + goto out; + } + if (is_sync_kiocb(iocb)) { + dio->reacquire_i_mutex = 1; + mutex_unlock(&inode->i_mutex); + } + } retval = direct_io_worker(rw, iocb, inode, iov, offset, nr_segs, blkbits, get_block, end_io, dio); out: - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html