[PATCH 4 of 7] Turn the DIO lock_type parameter into a flags field

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This creates a number of flags so that filesystems can control
blockdev_direct_IO.  It is based on code from Russell Cettelan.

The new flags are:
DIO_CREATE -- always pass create=1 to get_block on writes.  This allows
	      DIO to fill holes in the file.
DIO_PLACEHOLDERS -- use placeholder pages to provide locking against buffered
	            io and truncates.
DIO_EXTEND -- use truncate to grow the file instead of falling back to
	      buffered io.
DIO_DROP_I_MUTEX -- drop i_mutex before starting the IO on writes

Signed-off-by: Chris Mason <chris.mason@xxxxxxxxxx>

diff -r 3fa8c25ec60f -r f84d3216430d fs/direct-io.c
--- a/fs/direct-io.c	Wed Nov 01 10:22:34 2006 -0500
+++ b/fs/direct-io.c	Wed Nov 01 10:24:03 2006 -0500
@@ -53,13 +53,6 @@
  *
  * If blkfactor is zero then the user's request was aligned to the filesystem's
  * blocksize.
- *
- * lock_type is DIO_LOCKING for regular files on direct-IO-naive filesystems.
- * This determines whether we need to do the fancy locking which prevents
- * direct-IO from being able to read uninitialised disk blocks.  If its zero
- * (blockdev) this locking is not done, and if it is DIO_OWN_LOCKING i_mutex is
- * not held for the entire direct write (taken briefly, initially, during a
- * direct read though, but its never held for the duration of a direct-IO).
  */
 
 struct dio {
@@ -68,7 +61,7 @@ struct dio {
 	struct inode *inode;
 	int rw;
 	loff_t i_size;			/* i_size when submitted */
-	int lock_type;			/* doesn't change */
+	unsigned flags;			/* doesn't change */
 	int reacquire_i_mutex;		/* should we get i_mutex when done? */
 	unsigned blkbits;		/* doesn't change */
 	unsigned blkfactor;		/* When we're using an alignment which
@@ -203,7 +196,7 @@ static void unlock_page_range(struct dio
 static void unlock_page_range(struct dio *dio, unsigned long start,
 			      unsigned long nr)
 {
-	if (dio->lock_type != DIO_NO_LOCKING) {
+	if (dio->flags & DIO_PLACEHOLDERS) {
 		remove_placeholder_pages(dio->inode->i_mapping, dio->tmppages,
 					 &dio->fake,
 					 start, start + nr,
@@ -218,11 +211,13 @@ static int lock_page_range(struct dio *d
 	struct page *fake = &dio->fake;
 	unsigned long end = start + nr;
 
-	if (dio->lock_type == DIO_NO_LOCKING)
-		return 0;
-	return find_or_insert_placeholders(mapping, dio->tmppages, start, end,
-	                                  ARRAY_SIZE(dio->tmppages),
-					  GFP_KERNEL, fake, 1);
+	if (dio->flags & DIO_PLACEHOLDERS) {
+		return find_or_insert_placeholders(mapping, dio->tmppages,
+						   start, end,
+						   ARRAY_SIZE(dio->tmppages),
+						   GFP_KERNEL, fake, 1);
+	}
+	return 0;
 }
 
 
@@ -556,6 +551,7 @@ static int get_more_blocks(struct dio *d
 	unsigned long dio_count;/* Number of dio_block-sized blocks */
 	unsigned long blkmask;
 	unsigned long index;
+	unsigned long end;
 	int create;
 
 	/*
@@ -575,8 +571,9 @@ static int get_more_blocks(struct dio *d
 		map_bh->b_state = 0;
 		map_bh->b_size = fs_count << dio->inode->i_blkbits;
 
-		create = dio->rw & WRITE;
-		if (dio->lock_type == DIO_NO_LOCKING)
+		if (dio->flags & DIO_CREATE)
+			create = dio->rw & WRITE;
+		else
 			create = 0;
 	        index = fs_startblk >> (PAGE_CACHE_SHIFT -
 		                        dio->inode->i_blkbits);
@@ -1193,28 +1190,17 @@ direct_io_worker(int rw, struct kiocb *i
 
 /*
  * This is a library function for use by filesystem drivers.
- * The locking rules are governed by the dio_lock_type parameter.
- *
- * DIO_NO_LOCKING (no locking, for raw block device access)
- * For writes, i_mutex is not held on entry; it is never taken.
- *
- * DIO_LOCKING (simple locking for regular files)
- * For writes we are called under i_mutex and return with i_mutex held, even
- * though it is internally dropped.
- *
- * DIO_OWN_LOCKING (filesystem provides synchronisation and handling of
- *	uninitialised data, allowing parallel direct readers and writers)
- * For writes we are called without i_mutex, return without it, never touch it.
- * For reads we are called under i_mutex and return with i_mutex held, even
- * though it may be internally dropped.
- *
- * Additional i_alloc_sem locking requirements described inline below.
+ * The flags parameter is a bitmask of:
+ *
+ * DIO_PLACEHOLDERS (use placeholder pages for locking)
+ * DIO_CREATE (pass create=1 to get_block for filling holes)
+ * DIO_DROP_I_MUTEX (drop inode->i_mutex during writes)
  */
 ssize_t
 __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset, 
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	int dio_lock_type)
+	unsigned flags)
 {
 	int seg;
 	size_t size;
@@ -1225,7 +1211,6 @@ __blockdev_direct_IO(int rw, struct kioc
 	ssize_t retval = -EINVAL;
 	loff_t end = offset;
 	struct dio *dio;
-	struct address_space *mapping = iocb->ki_filp->f_mapping;
 
 	if (rw & WRITE)
 		rw = WRITE_SYNC;
@@ -1271,9 +1256,14 @@ __blockdev_direct_IO(int rw, struct kioc
 	 * For regular files using DIO_OWN_LOCKING,
 	 *	neither readers nor writers take any locks here
 	 */
-	dio->lock_type = dio_lock_type;
-
-	if (dio->lock_type == DIO_NO_LOCKING && end > offset) {
+	dio->flags = flags;
+
+	/*
+	 * the placeholder code does filemap_write_and_wait, so if we
+	 * aren't using placeholders we have to do it here
+	 */
+	if (!(dio->flags & DIO_PLACEHOLDERS) && end > offset) {
+		struct address_space *mapping = iocb->ki_filp->f_mapping;
 		retval = filemap_write_and_wait_range(mapping, offset, end - 1);
 		if (retval)
 			goto out;
@@ -1296,11 +1286,12 @@ __blockdev_direct_IO(int rw, struct kioc
 	 * mmap'd writes using writepage to fill holes
 	 */
 	dio->reacquire_i_mutex = 0;
-	if ((rw & WRITE) && dio_lock_type == DIO_LOCKING) {
+	if (rw & WRITE) {
 		/* if our write goes past i_size, do an expanding
 		 * truncate to fill it before dropping i_mutex
 		 */
-		if (end > i_size_read(inode) && iocb->ki_filp) {
+		if ((dio->flags & DIO_EXTEND) && end > i_size_read(inode) &&
+		    iocb->ki_filp) {
 			struct iattr newattrs;
 			newattrs.ia_size = end;
 			newattrs.ia_file = iocb->ki_filp;
@@ -1310,7 +1301,7 @@ __blockdev_direct_IO(int rw, struct kioc
 			if (retval)
 				goto out;
 		}
-		if (is_sync_kiocb(iocb)) {
+		if ((dio->flags & DIO_DROP_I_MUTEX) && is_sync_kiocb(iocb)) {
 			dio->reacquire_i_mutex = 1;
 			mutex_unlock(&inode->i_mutex);
 		}
diff -r 3fa8c25ec60f -r f84d3216430d include/linux/fs.h
--- a/include/linux/fs.h	Wed Nov 01 10:22:34 2006 -0500
+++ b/include/linux/fs.h	Wed Nov 01 10:24:03 2006 -0500
@@ -1801,21 +1801,32 @@ ssize_t __blockdev_direct_IO(int rw, str
 ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
 	struct block_device *bdev, const struct iovec *iov, loff_t offset,
 	unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
-	int lock_type);
-
-enum {
-	DIO_LOCKING = 1, /* need locking between buffered and direct access */
-	DIO_NO_LOCKING,  /* bdev; no locking at all between buffered/direct */
-	DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */
-};
+	unsigned int dio_flags);
+
+#define DIO_PLACEHOLDERS (1 << 0)  /* insert placeholder pages */
+#define DIO_CREATE	(1 << 1)  /* pass create=1 to get_block when writing */
+#define DIO_DROP_I_MUTEX (1 << 2) /* drop i_mutex during writes */
+#define DIO_EXTEND       (1 << 3) /* extend the file w/truncate if needed */
 
 static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
 	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
 	loff_t offset, unsigned long nr_segs, get_block_t get_block,
 	dio_iodone_t end_io)
 {
+	/* locking is on, FS wants to fill holes w/get_block */
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_LOCKING);
+				nr_segs, get_block, end_io, DIO_PLACEHOLDERS |
+				DIO_CREATE | DIO_DROP_I_MUTEX | DIO_EXTEND);
+}
+
+static inline ssize_t blockdev_direct_IO_flags(int rw, struct kiocb *iocb,
+	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
+	loff_t offset, unsigned long nr_segs, get_block_t get_block,
+	dio_iodone_t end_io, unsigned int flags)
+{
+	/* file system dictates locking and create behavior */
+	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
+				nr_segs, get_block, end_io, flags);
 }
 
 static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
@@ -1823,17 +1834,9 @@ static inline ssize_t blockdev_direct_IO
 	loff_t offset, unsigned long nr_segs, get_block_t get_block,
 	dio_iodone_t end_io)
 {
+	/* locking is off, create is off */
 	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_NO_LOCKING);
-}
-
-static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb,
-	struct inode *inode, struct block_device *bdev, const struct iovec *iov,
-	loff_t offset, unsigned long nr_segs, get_block_t get_block,
-	dio_iodone_t end_io)
-{
-	return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
-				nr_segs, get_block, end_io, DIO_OWN_LOCKING);
+				nr_segs, get_block, end_io, 0);
 }
 #endif
 


-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux