From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> To replace the apply API Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> --- fs/iomap/Makefile | 2 +- fs/iomap/iter.c | 81 +++++++++++++++++++++++++++++++++++++++++++ include/linux/iomap.h | 24 +++++++++++++ 3 files changed, 106 insertions(+), 1 deletion(-) create mode 100644 fs/iomap/iter.c diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile index eef2722d93a1..477e5e79f874 100644 --- a/fs/iomap/Makefile +++ b/fs/iomap/Makefile @@ -9,7 +9,7 @@ ccflags-y += -I $(srctree)/$(src) # needed for trace events obj-$(CONFIG_FS_IOMAP) += iomap.o iomap-y += trace.o \ - apply.o \ + apply.o iter.o \ buffered-io.o \ direct-io.o \ fiemap.o \ diff --git a/fs/iomap/iter.c b/fs/iomap/iter.c new file mode 100644 index 000000000000..1d668fdd928e --- /dev/null +++ b/fs/iomap/iter.c @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2010 Red Hat, Inc. + * Copyright (c) 2016-2018 Christoph Hellwig. + */ +#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/fs.h> +#include <linux/iomap.h> +#include "trace.h" + +/* + * Execute a iomap write on a segment of the mapping that spans a + * contiguous range of pages that have identical block mapping state. + * + * This avoids the need to map pages individually, do individual allocations + * for each page and most importantly avoid the need for filesystem specific + * locking per page. Instead, all the operations are amortised over the entire + * range of pages. It is assumed that the filesystems will lock whatever + * resources they require in the iomap_begin call, and release them in the + * iomap_end call. + */ +loff_t iomap_iter(struct iomap_iter *iter, loff_t written) +{ + const struct iomap_ops *ops = iter->ops; + struct iomap *iomap = &iter->iomap; + struct iomap *srcmap = &iter->srcmap; + loff_t end, ret = 0; + + trace_iomap_apply(iter->inode, iter->pos, iter->len, iter->flags, + iter->ops, NULL, _RET_IP_); + + if (written != IOMAP_FIRST_CALL) { + if (ops->iomap_end) + ret = ops->iomap_end(iter->inode, iter->pos, + iter->len, written > 0 ? written : 0, + iter->flags, iomap); + if (written < 0) + return written; + if (ret < 0) + return ret; + iter->pos += written; + iter->len -= written; + } + + /* + * Need to map a range from start position for length bytes. This can + * span multiple pages - it is only guaranteed to return a range of a + * single type of pages (e.g. all into a hole, all mapped or all + * unwritten). Failure at this point has nothing to undo. + * + * If allocation is required for this range, reserve the space now so + * that the allocation is guaranteed to succeed later on. Once we copy + * the data into the page cache pages, then we cannot fail otherwise we + * expose transient stale data. If the reserve fails, we can safely + * back out at this point as there is nothing to undo. + */ + ret = ops->iomap_begin(iter->inode, iter->pos, iter->len, + iter->flags, iomap, srcmap); + if (ret) + return ret; + if (WARN_ON(iomap->offset > iter->pos)) + return -EIO; + if (WARN_ON(iomap->offset + iomap->length <= iter->pos)) + return -EIO; + if (WARN_ON(iomap->length == 0)) + return -EIO; + + trace_iomap_apply_dstmap(iter->inode, iomap); + if (srcmap->type != IOMAP_HOLE) + trace_iomap_apply_srcmap(iter->inode, srcmap); + + /* + * Cut down the length to the one actually provided by the filesystem, + * as it might not be able to give us the whole size that we requested. + */ + end = iomap->offset + iomap->length; + if (srcmap->type != IOMAP_HOLE) + end = min_t(loff_t, end, srcmap->offset + srcmap->length); + return min(iter->len, end - iter->pos); +} diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 8b09463dae0d..ec00a2268f14 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -142,6 +142,30 @@ struct iomap_ops { ssize_t written, unsigned flags, struct iomap *iomap); }; +struct iomap_iter { + struct inode *inode; + const struct iomap_ops *ops; + loff_t pos; + loff_t len; + unsigned flags; + struct iomap iomap; + struct iomap srcmap; +}; + +#define DEFINE_IOMAP_ITER(name, _inode, _pos, _len, _flags, _ops) \ + struct iomap_iter name = { \ + .inode = _inode, \ + .ops = _ops, \ + .pos = _pos, \ + .len = _len, \ + .flags = _flags, \ + } + +/* Magic value for first call to iterator */ +#define IOMAP_FIRST_CALL LLONG_MIN + +loff_t iomap_iter(struct iomap_iter *, loff_t written); + /* * Main iomap iterator function. */ -- 2.25.1