>From chris.mason@xxxxxxxxxx Thu Dec 21 15:35:00 2006 Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Subject: [PATCH 3 of 8] DIO: don't fall back to buffered writes X-Mercurial-Node: ac51e7a4c7a66bc589e4e3640f5f822febab8be0 Message-Id: <ac51e7a4c7a66bc589e4.1166733299@xxxxxxxxxxxxxxxxxxx> In-Reply-To: <patchbomb.1166733296@xxxxxxxxxxxxxxxxxxx> Date: Thu, 21 Dec 2006 15:34:59 -0400 From: Chris Mason <chris.mason@xxxxxxxxxx> To: linux-fsdevel@xxxxxxxxxxxxxxx, akpm@xxxxxxxx, zach.brown@xxxxxxxxxx Placeholder pages allow DIO to use locking rules similar to that of writepage. DIO can now fill holes, and it can extend the file via get_block(). i_mutex can be dropped during writes if we are writing inside i_size. Signed-off-by: Chris Mason <chris.mason@xxxxxxxxxx> diff -r 317779b11fe1 -r ac51e7a4c7a6 fs/direct-io.c --- a/fs/direct-io.c Thu Dec 21 15:31:30 2006 -0500 +++ b/fs/direct-io.c Thu Dec 21 15:31:30 2006 -0500 @@ -70,6 +70,7 @@ struct dio { int rw; loff_t i_size; /* i_size when submitted */ int lock_type; /* doesn't change */ + int reacquire_i_mutex; /* should we get i_mutex when done? */ unsigned blkbits; /* doesn't change */ unsigned blkfactor; /* When we're using an alignment which is finer than the filesystem's soft @@ -218,8 +219,7 @@ static int lock_page_range(struct dio *d return 0; return find_or_insert_placeholders(mapping, dio->tmppages, start, end, ARRAY_SIZE(dio->tmppages), - GFP_KERNEL, - dio->rw == READ); + GFP_KERNEL, 1); } @@ -282,6 +282,8 @@ static int dio_complete(struct dio *dio, unlock_page_range(dio, dio->fspages_start_off, dio->fspages_end_off - dio->fspages_start_off); dio->fspages_end_off = dio->fspages_start_off; + if (dio->reacquire_i_mutex) + mutex_lock(&dio->inode->i_mutex); if (ret == 0) ret = dio->page_errors; @@ -568,13 +570,8 @@ static int get_more_blocks(struct dio *d map_bh->b_size = fs_count << dio->inode->i_blkbits; create = dio->rw & WRITE; - if (dio->lock_type == DIO_LOCKING) { - if (dio->block_in_file < (i_size_read(dio->inode) >> - dio->blkbits)) - create = 0; - } else if (dio->lock_type == DIO_NO_LOCKING) { + if (dio->lock_type == DIO_NO_LOCKING) create = 0; - } index = fs_startblk >> (PAGE_CACHE_SHIFT - dio->inode->i_blkbits); end = (dio->final_block_in_request >> dio->blkfactor) >> @@ -1258,6 +1255,13 @@ __blockdev_direct_IO(int rw, struct kioc dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && (end > i_size_read(inode))); + /* if our write is inside i_size, we can drop i_mutex */ + dio->reacquire_i_mutex = 0; + if ((rw & WRITE) && dio_lock_type == DIO_LOCKING && + end <= i_size_read(inode) && is_sync_kiocb(iocb)) { + dio->reacquire_i_mutex = 1; + mutex_unlock(&inode->i_mutex); + } retval = direct_io_worker(rw, iocb, inode, iov, offset, nr_segs, blkbits, get_block, end_io, dio); out: diff -r 317779b11fe1 -r ac51e7a4c7a6 mm/filemap.c --- a/mm/filemap.c Thu Dec 21 15:31:30 2006 -0500 +++ b/mm/filemap.c Thu Dec 21 15:31:30 2006 -0500 @@ -2865,10 +2865,19 @@ generic_file_direct_IO(int rw, struct ki retval = mapping->a_ops->direct_IO(rw, iocb, iov, offset, nr_segs); if (rw == WRITE && mapping->nrpages) { + int err; pgoff_t end = (offset + write_len - 1) >> PAGE_CACHE_SHIFT; - int err = invalidate_inode_pages2_range(mapping, - offset >> PAGE_CACHE_SHIFT, end); + + /* O_DIRECT is allowed to drop i_mutex, so more data + * could have been dirtied by others. Start io one more + * time + */ + err = filemap_fdatawrite_range(mapping, offset, + offset + write_len - 1); + if (!err) + err = invalidate_inode_pages2_range(mapping, + offset >> PAGE_CACHE_SHIFT, end); if (err) retval = err; } - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html