[RFC 3/8] iomap: Add atomic write support for direct-io

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This adds direct-io atomic writes support in iomap. This adds -
1. IOMAP_ATOMIC flag for iomap iter.
2. Sets REQ_ATOMIC to bio opflags.
3. Adds necessary checks in iomap_dio code to ensure a single bio is
   submitted for an atomic write request. (since we only support ubuf
   type iocb). Otherwise return an error EIO.
4. Adds a common helper routine iomap_dio_check_atomic(). It helps in
   verifying mapped length and start/end physical offset against the hw
   device constraints for supporting atomic writes.

This patch is based on a patch from John Garry <john.g.garry@xxxxxxxxxx>
which adds such support of DIO atomic writes to iomap.

Co-developed-by: Ojaswin Mujoo <ojaswin@xxxxxxxxxxxxx>
Signed-off-by: Ojaswin Mujoo <ojaswin@xxxxxxxxxxxxx>
Signed-off-by: Ritesh Harjani (IBM) <ritesh.list@xxxxxxxxx>
---
 fs/iomap/direct-io.c  | 75 +++++++++++++++++++++++++++++++++++++++++--
 fs/iomap/trace.h      |  3 +-
 include/linux/iomap.h |  1 +
 3 files changed, 75 insertions(+), 4 deletions(-)

diff --git a/fs/iomap/direct-io.c b/fs/iomap/direct-io.c
index bcd3f8cf5ea4..b4548acb74e7 100644
--- a/fs/iomap/direct-io.c
+++ b/fs/iomap/direct-io.c
@@ -256,7 +256,7 @@ static void iomap_dio_zero(const struct iomap_iter *iter, struct iomap_dio *dio,
  * clearing the WRITE_THROUGH flag in the dio request.
  */
 static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
-		const struct iomap *iomap, bool use_fua)
+		const struct iomap *iomap, bool use_fua, bool atomic_write)
 {
 	blk_opf_t opflags = REQ_SYNC | REQ_IDLE;
 
@@ -269,6 +269,9 @@ static inline blk_opf_t iomap_dio_bio_opflags(struct iomap_dio *dio,
 	else
 		dio->flags &= ~IOMAP_DIO_WRITE_THROUGH;
 
+	if (atomic_write)
+		opflags |= REQ_ATOMIC;
+
 	return opflags;
 }
 
@@ -279,11 +282,12 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	struct inode *inode = iter->inode;
 	unsigned int fs_block_size = i_blocksize(inode), pad;
 	loff_t length = iomap_length(iter);
+	const size_t orig_len = iter->len;
 	loff_t pos = iter->pos;
 	blk_opf_t bio_opf;
 	struct bio *bio;
 	bool need_zeroout = false;
-	bool use_fua = false;
+	bool use_fua = false, atomic_write = iter->flags & IOMAP_ATOMIC;
 	int nr_pages, ret = 0;
 	size_t copied = 0;
 	size_t orig_count;
@@ -356,6 +360,11 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	if (need_zeroout) {
 		/* zero out from the start of the block to the write offset */
 		pad = pos & (fs_block_size - 1);
+		if (unlikely(pad && atomic_write)) {
+			WARN_ON_ONCE("pos not atomic write aligned\n");
+			ret = -EINVAL;
+			goto out;
+		}
 		if (pad)
 			iomap_dio_zero(iter, dio, pos - pad, pad);
 	}
@@ -365,7 +374,7 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	 * can set up the page vector appropriately for a ZONE_APPEND
 	 * operation.
 	 */
-	bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua);
+	bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua, atomic_write);
 
 	nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
 	do {
@@ -397,6 +406,14 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 		}
 
 		n = bio->bi_iter.bi_size;
+
+		/* This bio should have covered the complete length */
+		if (unlikely(atomic_write && n != orig_len)) {
+			WARN_ON_ONCE(1);
+			ret = -EINVAL;
+			bio_put(bio);
+			goto out;
+		}
 		if (dio->flags & IOMAP_DIO_WRITE) {
 			task_io_account_write(n);
 		} else {
@@ -429,6 +446,8 @@ static loff_t iomap_dio_bio_iter(const struct iomap_iter *iter,
 	    ((dio->flags & IOMAP_DIO_WRITE) && pos >= i_size_read(inode))) {
 		/* zero out from the end of the write to the end of the block */
 		pad = pos & (fs_block_size - 1);
+		/* This should never happen */
+		WARN_ON_ONCE(unlikely(pad && atomic_write));
 		if (pad)
 			iomap_dio_zero(iter, dio, pos, fs_block_size - pad);
 	}
@@ -516,6 +535,44 @@ static loff_t iomap_dio_iter(const struct iomap_iter *iter,
 	}
 }
 
+/*
+ * iomap_dio_check_atomic:	DIO Atomic checks before calling bio submission.
+ * @iter:			iomap iterator
+ * This function is called after filesystem block mapping and before bio
+ * formation/submission. This is the right place to verify hw device/block
+ * layer constraints to be followed for doing atomic writes. Hence do those
+ * common checks here.
+ */
+static bool iomap_dio_check_atomic(struct iomap_iter *iter)
+{
+	struct block_device *bdev = iter->iomap.bdev;
+	unsigned long long map_len = iomap_length(iter);
+	unsigned long long start = iomap_sector(&iter->iomap, iter->pos)
+						<< SECTOR_SHIFT;
+	unsigned long long end = start + map_len - 1;
+	unsigned int awu_min =
+			queue_atomic_write_unit_min_bytes(bdev->bd_queue);
+	unsigned int awu_max =
+			queue_atomic_write_unit_max_bytes(bdev->bd_queue);
+	unsigned long boundary =
+			queue_atomic_write_boundary_bytes(bdev->bd_queue);
+	unsigned long mask = ~(boundary - 1);
+
+
+	/* map_len should be same as user specified iter->len */
+	if (map_len < iter->len)
+		return false;
+	/* start should be aligned to block device min atomic unit alignment */
+	if (!IS_ALIGNED(start, awu_min))
+		return false;
+	/* If top bits doesn't match, means atomic unit boundary is crossed */
+	if (boundary && ((start | mask) != (end | mask)))
+		return false;
+
+	return true;
+}
+
+
 /*
  * iomap_dio_rw() always completes O_[D]SYNC writes regardless of whether the IO
  * is being issued as AIO or not.  This allows us to optimise pure data writes
@@ -554,12 +611,16 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 	struct blk_plug plug;
 	struct iomap_dio *dio;
 	loff_t ret = 0;
+	bool atomic_write = iocb->ki_flags & IOCB_ATOMIC;
 
 	trace_iomap_dio_rw_begin(iocb, iter, dio_flags, done_before);
 
 	if (!iomi.len)
 		return NULL;
 
+	if (atomic_write && !iter_is_ubuf(iter))
+		return ERR_PTR(-EINVAL);
+
 	dio = kmalloc(sizeof(*dio), GFP_KERNEL);
 	if (!dio)
 		return ERR_PTR(-ENOMEM);
@@ -605,6 +666,9 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 		if (iocb->ki_flags & IOCB_DIO_CALLER_COMP)
 			dio->flags |= IOMAP_DIO_CALLER_COMP;
 
+		if (atomic_write)
+			iomi.flags |= IOMAP_ATOMIC;
+
 		if (dio_flags & IOMAP_DIO_OVERWRITE_ONLY) {
 			ret = -EAGAIN;
 			if (iomi.pos >= dio->i_size ||
@@ -656,6 +720,11 @@ __iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter,
 
 	blk_start_plug(&plug);
 	while ((ret = iomap_iter(&iomi, ops)) > 0) {
+		if (atomic_write && !iomap_dio_check_atomic(&iomi)) {
+			ret = -EIO;
+			break;
+		}
+
 		iomi.processed = iomap_dio_iter(&iomi, dio);
 
 		/*
diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h
index c16fd55f5595..c95576420bca 100644
--- a/fs/iomap/trace.h
+++ b/fs/iomap/trace.h
@@ -98,7 +98,8 @@ DEFINE_RANGE_EVENT(iomap_dio_rw_queued);
 	{ IOMAP_REPORT,		"REPORT" }, \
 	{ IOMAP_FAULT,		"FAULT" }, \
 	{ IOMAP_DIRECT,		"DIRECT" }, \
-	{ IOMAP_NOWAIT,		"NOWAIT" }
+	{ IOMAP_NOWAIT,		"NOWAIT" }, \
+	{ IOMAP_ATOMIC,		"ATOMIC" }
 
 #define IOMAP_F_FLAGS_STRINGS \
 	{ IOMAP_F_NEW,		"NEW" }, \
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index 96dd0acbba44..9eac704a0d6f 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -178,6 +178,7 @@ struct iomap_folio_ops {
 #else
 #define IOMAP_DAX		0
 #endif /* CONFIG_FS_DAX */
+#define IOMAP_ATOMIC		(1 << 9)
 
 struct iomap_ops {
 	/*
-- 
2.43.0





[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux