[RFC 1/1] ext4: PoC implementation of option-1

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is just a PoC implementation of the option-1 which I was
mentioning in the previous email.
As mentioned, it is adding some flags & private pointer
to iomap infrastructure.

I would let upto you and others to comment on this approach.
Please note that I have run only few basic tests.

Signed-off-by: Ritesh Harjani <riteshh@xxxxxxxxxxxxx>
---
 fs/ext4/ext4.h        |  1 +
 fs/ext4/file.c        | 37 ++++++++++++++++++++++++++++++++++---
 fs/ext4/inode.c       | 31 +++++++++++++++++++++++++++++++
 fs/iomap/direct-io.c  | 16 +++++++++++++++-
 fs/xfs/xfs_file.c     |  3 ++-
 include/linux/iomap.h |  5 ++++-
 6 files changed, 87 insertions(+), 6 deletions(-)

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 2ab91815f52d..93aa716c691e 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -1555,6 +1555,7 @@ enum {
 	EXT4_STATE_NO_EXPAND,		/* No space for expansion */
 	EXT4_STATE_DA_ALLOC_CLOSE,	/* Alloc DA blks on close */
 	EXT4_STATE_EXT_MIGRATE,		/* Inode is migrating */
+	EXT4_STATE_DIO_UNWRITTEN,	/* need convert on dio done*/
 	EXT4_STATE_NEWENTRY,		/* File just added to dir */
 	EXT4_STATE_MAY_INLINE_DATA,	/* may have in-inode data */
 	EXT4_STATE_EXT_PRECACHED,	/* extents have been precached */
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 3736dbf28d0a..adfb56401312 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -314,8 +314,30 @@ static int ext4_handle_inode_extension(struct inode *inode, loff_t size,
 	return ret;
 }
 
+static int ext4_dio_write_end_io_convert(struct inode *inode, loff_t offset,
+					 ssize_t size, void *private)
+{
+	int ret = 0;
+	ext4_io_end_t *io_end = private;
+
+	if (!io_end) {
+		WARN_ON(!ext4_test_inode_state(inode,
+				EXT4_STATE_DIO_UNWRITTEN));
+		ret = ext4_convert_unwritten_extents(NULL, inode, offset, size);
+		if (ret)
+			return ret;
+		ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+	} else {
+		io_end->offset = offset;
+		io_end->size = size;
+		ext4_put_io_end(io_end);
+	}
+	return ret;
+}
+
 static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
-				 ssize_t error, unsigned int flags)
+				 ssize_t error, unsigned int flags,
+				 void *private)
 {
 	int ret = 0;
 	handle_t *handle;
@@ -345,12 +367,21 @@ static int ext4_dio_write_end_io(struct kiocb *iocb, ssize_t size,
 				ext4_orphan_del(handle, inode);
 			ext4_journal_stop(handle);
 		}
+
+		if (flags & IOMAP_DIO_UNWRITTEN) {
+			ret = ext4_dio_write_end_io_convert(inode, offset,
+					size, private);
+			if (ret < 0)
+				return ret;
+		}
+
 		return error;
 	}
 
 	if (flags & IOMAP_DIO_UNWRITTEN) {
-		ret = ext4_convert_unwritten_extents(NULL, inode, offset, size);
-		if (ret)
+		ret = ext4_dio_write_end_io_convert(inode, offset,
+						    size, private);
+		if (ret < 0)
 			return ret;
 	}
 
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 15715d22808d..9d77ed2aa58c 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -3322,6 +3322,26 @@ static bool ext4_inode_datasync_dirty(struct inode *inode)
 	return inode->i_state & I_DIRTY_DATASYNC;
 }
 
+static int ext4_iomap_init_io_end(struct inode *inode, struct iomap *iomap,
+				  unsigned flags)
+{
+	ext4_io_end_t *io_end;
+
+	if (flags & IOMAP_DIRECT_AIO) {
+		if (flags & IOMAP_PRIVATE)
+			return 0;
+		io_end = ext4_init_io_end(inode, GFP_KERNEL);
+		if (!io_end)
+			return -ENOMEM;
+		iomap->private = io_end;
+		ext4_set_io_unwritten_flag(inode, io_end);
+	} else {
+		iomap->private = NULL;
+		ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+	}
+	return 0;
+}
+
 static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 			    unsigned flags, struct iomap *iomap)
 {
@@ -3433,6 +3453,17 @@ static int ext4_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
 				ret = ext4_map_blocks(handle, inode, &map,
 						      EXT4_GET_BLOCKS_IO_CREATE_EXT);
 			}
+
+			if (map.m_flags & EXT4_MAP_UNWRITTEN) {
+				int err;
+
+				err = ext4_iomap_init_io_end(inode, iomap,
+							     flags);
+				if (err < 0) {
+					ext4_journal_stop(handle);
+					return err;
+				}
+			}
 		}
 
 		if (ret < 0) {
diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index 2ccf1c6460d4..7d88683a0d93 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -17,6 +17,7 @@
  * Private flags for iomap_dio, must not overlap with the public ones in
  * iomap.h:
  */
+#define IOMAP_DIO_PRIVATE	(1 << 27)
 #define IOMAP_DIO_WRITE_FUA	(1 << 28)
 #define IOMAP_DIO_NEED_SYNC	(1 << 29)
 #define IOMAP_DIO_WRITE		(1 << 30)
@@ -31,6 +32,7 @@ struct iomap_dio {
 	unsigned		flags;
 	int			error;
 	bool			wait_for_completion;
+	void			*private;	/* FS specific */
 
 	union {
 		/* used during submission and for synchronous completion: */
@@ -78,7 +80,8 @@ static ssize_t iomap_dio_complete(struct iomap_dio *dio)
 	ssize_t ret;
 
 	if (dio->end_io)
-		ret = dio->end_io(iocb, dio->size, dio->error, dio->flags);
+		ret = dio->end_io(iocb, dio->size, dio->error, dio->flags,
+				  dio->private);
 	else
 		ret = dio->error;
 
@@ -363,6 +366,11 @@ iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length,
 {
 	struct iomap_dio *dio = data;
 
+	if (!(dio->flags & IOMAP_DIO_PRIVATE) && iomap->private) {
+		dio->private = iomap->private;
+		dio->flags |= IOMAP_DIO_PRIVATE;
+	}
+
 	switch (iomap->type) {
 	case IOMAP_HOLE:
 		if (WARN_ON_ONCE(dio->flags & IOMAP_DIO_WRITE))
@@ -421,6 +429,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	dio->end_io = end_io;
 	dio->error = 0;
 	dio->flags = 0;
+	dio->private = NULL;
 
 	dio->submit.iter = iter;
 	dio->submit.waiter = current;
@@ -458,6 +467,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		}
 		flags |= IOMAP_NOWAIT;
 	}
+	if (!is_sync_kiocb(iocb) && (flags & IOMAP_WRITE))
+		flags |= IOMAP_DIRECT_AIO;
 
 	ret = filemap_write_and_wait_range(mapping, start, end);
 	if (ret)
@@ -486,6 +497,9 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	blk_start_plug(&plug);
 	do {
+		if (dio->flags & IOMAP_DIO_PRIVATE)
+			flags |= IOMAP_PRIVATE;
+
 		ret = iomap_apply(inode, pos, count, flags, ops, dio,
 				iomap_dio_actor);
 		if (ret <= 0) {
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f2bc3ac4a60e..205c4219986c 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -370,7 +370,8 @@ xfs_dio_write_end_io(
 	struct kiocb		*iocb,
 	ssize_t			size,
 	ssize_t                 error,
-	unsigned		flags)
+	unsigned		flags,
+	void			*private)
 {
 	struct inode		*inode = file_inode(iocb->ki_filp);
 	struct xfs_inode	*ip = XFS_I(inode);
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 900284e5c06c..57d3679442f9 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -102,6 +102,8 @@ struct iomap_page_ops {
 #define IOMAP_FAULT		(1 << 3) /* mapping for page fault */
 #define IOMAP_DIRECT		(1 << 4) /* direct I/O */
 #define IOMAP_NOWAIT		(1 << 5) /* do not block */
+#define IOMAP_DIRECT_AIO	(1 << 6) /* AIO DIO */
+#define IOMAP_PRIVATE		(1 << 7) /* FS specific */
 
 struct iomap_ops {
 	/*
@@ -189,7 +191,8 @@ sector_t iomap_bmap(struct address_space *mapping, sector_t bno,
 #define IOMAP_DIO_UNWRITTEN	(1 << 0)	/* covers unwritten extent(s) */
 #define IOMAP_DIO_COW		(1 << 1)	/* covers COW extent(s) */
 typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t size,
-				 ssize_t error, unsigned int flags);
+				 ssize_t error, unsigned int flags,
+				 void *private);
 ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops, iomap_dio_end_io_t end_io);
 int iomap_dio_iopoll(struct kiocb *kiocb, bool spin);
-- 
2.21.0




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux