[PATCH 3 of 8] DIO: don't fall back to buffered writes

Chris Mason <chris.mason@xxxxxxxxxx> · Thu, 21 Dec 2006 20:57:43 -0500

Placeholder pages allow DIO to use locking rules similar to that of
writepage.  DIO can now fill holes, and it can extend the file via
get_block().

i_mutex can be dropped during writes if we are writing inside i_size.

Signed-off-by: Chris Mason <chris.mason@xxxxxxxxxx>

diff -r 317779b11fe1 -r ac51e7a4c7a6 fs/direct-io.c

--- a/fs/direct-io.c	Thu Dec 21 15:31:30 2006 -0500
+++ b/fs/direct-io.c	Thu Dec 21 15:31:30 2006 -0500
@@ -70,6 +70,7 @@ struct dio {
 	int rw;
 	loff_t i_size;			/* i_size when submitted */
 	int lock_type;			/* doesn't change */
+	int reacquire_i_mutex;		/* should we get i_mutex when done? */
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
 					   is finer than the filesystem's soft
@@ -218,8 +219,7 @@ static int lock_page_range(struct dio *d
 		return 0;
 	return find_or_insert_placeholders(mapping, dio->tmppages, start, end,
 	                                  ARRAY_SIZE(dio->tmppages),
-					  GFP_KERNEL,
-					  dio->rw == READ);
+					  GFP_KERNEL, 1);
 }
 
 
@@ -282,6 +282,8 @@ static int dio_complete(struct dio *dio,
 	unlock_page_range(dio, dio->fspages_start_off,
 			  dio->fspages_end_off - dio->fspages_start_off);
 	dio->fspages_end_off = dio->fspages_start_off;
+	if (dio->reacquire_i_mutex)
+		mutex_lock(&dio->inode->i_mutex);
 
 	if (ret == 0)
 		ret = dio->page_errors;
@@ -568,13 +570,8 @@ static int get_more_blocks(struct dio *d
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
 		create = dio->rw & WRITE;
-		if (dio->lock_type == DIO_LOCKING) {
-			if (dio->block_in_file < (i_size_read(dio->inode) >>
-							dio->blkbits))
-				create = 0;
-		} else if (dio->lock_type == DIO_NO_LOCKING) {
+		if (dio->lock_type == DIO_NO_LOCKING)
 			create = 0;
-		}
 	        index = fs_startblk >> (PAGE_CACHE_SHIFT -
 		                        dio->inode->i_blkbits);
 		end = (dio->final_block_in_request >> dio->blkfactor) >>
@@ -1258,6 +1255,13 @@ __blockdev_direct_IO(int rw, struct kioc
 	dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) &&
 		(end > i_size_read(inode)));
 
+	/* if our write is inside i_size, we can drop i_mutex */
+	dio->reacquire_i_mutex = 0;
+	if ((rw & WRITE) && dio_lock_type == DIO_LOCKING &&
+	   end <= i_size_read(inode) && is_sync_kiocb(iocb)) {
+		dio->reacquire_i_mutex = 1;
+		mutex_unlock(&inode->i_mutex);
+	}
 	retval = direct_io_worker(rw, iocb, inode, iov, offset,
 				nr_segs, blkbits, get_block, end_io, dio);
 out:
diff -r 317779b11fe1 -r ac51e7a4c7a6 mm/filemap.c
--- a/mm/filemap.c	Thu Dec 21 15:31:30 2006 -0500
+++ b/mm/filemap.c	Thu Dec 21 15:31:30 2006 -0500
@@ -2865,10 +2865,19 @@ generic_file_direct_IO(int rw, struct ki
 	retval = mapping->a_ops->direct_IO(rw, iocb, iov,
 					offset, nr_segs);
 	if (rw == WRITE && mapping->nrpages) {
+		int err;
 		pgoff_t end = (offset + write_len - 1)
 					>> PAGE_CACHE_SHIFT;
-		int err = invalidate_inode_pages2_range(mapping,
-				offset >> PAGE_CACHE_SHIFT, end);
+
+		/* O_DIRECT is allowed to drop i_mutex, so more data
+		 * could have been dirtied by others.  Start io one more
+		 * time
+		 */
+		err = filemap_fdatawrite_range(mapping, offset,
+		                               offset + write_len - 1);
+		if (!err)
+			err = invalidate_inode_pages2_range(mapping,
+					offset >> PAGE_CACHE_SHIFT, end);
 		if (err)
 			retval = err;
 	}


-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html