Then it's just a trivial backport of ff6165b2d7f6 and the trivial
io_uring patch after that.
diff --git a/fs/io_uring.c b/fs/io_uring.c
index ab9290ab4cae..138f204db72a 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -3429,6 +3429,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
+ struct iov_iter_state iter_state;
ssize_t io_size, ret, ret2;
bool no_async;
@@ -3458,6 +3459,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (unlikely(ret))
goto out_free;
+ iov_iter_save_state(iter, &iter_state);
ret = io_iter_do_read(req, iter);
if (!ret) {
@@ -3473,7 +3475,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
if (req->file->f_flags & O_NONBLOCK)
goto done;
/* some cases will consume bytes even on error returns */
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ iov_iter_restore(iter, &iter_state);
ret = 0;
goto copy_iov;
} else if (ret < 0) {
@@ -3557,6 +3559,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
struct kiocb *kiocb = &req->rw.kiocb;
struct iov_iter __iter, *iter = &__iter;
struct io_async_rw *rw = req->async_data;
+ struct iov_iter_state iter_state;
ssize_t ret, ret2, io_size;
if (rw)
@@ -3574,6 +3577,8 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
else
kiocb->ki_flags |= IOCB_NOWAIT;
+ iov_iter_save_state(iter, &iter_state);
+
/* If the file doesn't support async, just async punt */
if (force_nonblock && !io_file_supports_async(req->file, WRITE))
goto copy_iov;
@@ -3626,7 +3631,7 @@ static int io_write(struct io_kiocb *req, bool force_nonblock,
} else {
copy_iov:
/* some cases will consume bytes even on error returns */
- iov_iter_revert(iter, io_size - iov_iter_count(iter));
+ iov_iter_restore(iter, &iter_state);
ret = io_setup_async_rw(req, iovec, inline_vecs, iter, false);
if (!ret)
return -EAGAIN;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 27ff8eb786dc..cedb68e49e4f 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -26,6 +26,12 @@ enum iter_type {
ITER_DISCARD = 64,
};
+struct iov_iter_state {
+ size_t iov_offset;
+ size_t count;
+ unsigned long nr_segs;
+};
+
struct iov_iter {
/*
* Bit 0 is the read/write bit, set if we're writing.
@@ -55,6 +61,14 @@ static inline enum iter_type iov_iter_type(const struct iov_iter *i)
return i->type & ~(READ | WRITE);
}
+static inline void iov_iter_save_state(struct iov_iter *iter,
+ struct iov_iter_state *state)
+{
+ state->iov_offset = iter->iov_offset;
+ state->count = iter->count;
+ state->nr_segs = iter->nr_segs;
+}
+
static inline bool iter_is_iovec(const struct iov_iter *i)
{
return iov_iter_type(i) == ITER_IOVEC;
@@ -226,6 +240,7 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
size_t maxsize, size_t *start);
int iov_iter_npages(const struct iov_iter *i, int maxpages);
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state);
const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 1b0a349fbcd9..00a66229d182 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1857,3 +1857,39 @@ int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
return err;
}
EXPORT_SYMBOL(iov_iter_for_each_range);
+
+/**
+ * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
+ * iov_iter_save_state() was called.
+ *
+ * @i: &struct iov_iter to restore
+ * @state: state to restore from
+ *
+ * Used after iov_iter_save_state() to bring restore @i, if operations may
+ * have advanced it.
+ *
+ * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
+ */
+void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
+{
+ if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
+ !iov_iter_is_kvec(i))
+ return;
+ i->iov_offset = state->iov_offset;
+ i->count = state->count;
+ /*
+ * For the *vec iters, nr_segs + iov is constant - if we increment
+ * the vec, then we also decrement the nr_segs count. Hence we don't
+ * need to track both of these, just one is enough and we can deduct
+ * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
+ * size, so we can just increment the iov pointer as they are unionzed.
+ * ITER_BVEC _may_ be the same size on some archs, but on others it is
+ * not. Be safe and handle it separately.
+ */
+ BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
+ if (iov_iter_is_bvec(i))
+ i->bvec -= state->nr_segs - i->nr_segs;
+ else
+ i->iov -= state->nr_segs - i->nr_segs;
+ i->nr_segs = state->nr_segs;
+}