[PATCH RFC] io_uring: improve current file position IO

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



io_uring should be protecting the current file position with the
file position lock, ->f_pos_lock. Grab and track the lock state when
the request is being issued, and make the unlock part of request
cleaning.

Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position")
Reported-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>

---

Main thing I don't like here:

- We're holding the f_pos_lock across the kernel/user boundary, as
  it's held for the duration of the IO. Alternatively we could
  keep it local to io_read() and io_write() and lose REQ_F_CUR_POS_LOCK,
  but will messy up those functions more and add more items to the
  fast path (which current position read/write definitely is not).

Suggestions welcome...


diff --git a/fs/io_uring.c b/fs/io_uring.c
index fb2a0cb4aaf8..6f7713ab2eda 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -112,7 +112,7 @@
 
 #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
 				REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
-				REQ_F_ASYNC_DATA)
+				REQ_F_ASYNC_DATA | REQ_F_CUR_POS_LOCK)
 
 #define IO_TCTX_REFS_CACHE_NR	(1U << 10)
 
@@ -726,6 +726,7 @@ enum {
 	REQ_F_FAIL_BIT		= 8,
 	REQ_F_INFLIGHT_BIT,
 	REQ_F_CUR_POS_BIT,
+	REQ_F_CUR_POS_LOCK_BIT,
 	REQ_F_NOWAIT_BIT,
 	REQ_F_LINK_TIMEOUT_BIT,
 	REQ_F_NEED_CLEANUP_BIT,
@@ -765,6 +766,8 @@ enum {
 	REQ_F_INFLIGHT		= BIT(REQ_F_INFLIGHT_BIT),
 	/* read/write uses file position */
 	REQ_F_CUR_POS		= BIT(REQ_F_CUR_POS_BIT),
+	/* request is holding file position lock */
+	REQ_F_CUR_POS_LOCK	= BIT(REQ_F_CUR_POS_LOCK_BIT),
 	/* must not punt to workers */
 	REQ_F_NOWAIT		= BIT(REQ_F_NOWAIT_BIT),
 	/* has or had linked timeout */
@@ -2715,6 +2718,8 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res)
 		req_set_fail(req);
 		req->result = res;
 	}
+	if (req->flags & REQ_F_CUR_POS && res > 0)
+		req->file->f_pos += res;
 	return false;
 }
 
@@ -2892,12 +2897,15 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
 	kiocb->ki_pos = READ_ONCE(sqe->off);
 	if (kiocb->ki_pos == -1) {
-		if (!(file->f_mode & FMODE_STREAM)) {
+		/*
+		 * If we end up using the current file position, just set the
+		 * flag and the actual file position will be read in the op
+		 * handler themselves.
+		 */
+		if (!(file->f_mode & FMODE_STREAM))
 			req->flags |= REQ_F_CUR_POS;
-			kiocb->ki_pos = file->f_pos;
-		} else {
+		else
 			kiocb->ki_pos = 0;
-		}
 	}
 	kiocb->ki_flags = iocb_flags(file);
 	ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags));
@@ -2979,8 +2987,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret,
 			ret += io->bytes_done;
 	}
 
-	if (req->flags & REQ_F_CUR_POS)
-		req->file->f_pos = kiocb->ki_pos;
 	if (ret >= 0 && (kiocb->ki_complete == io_complete_rw))
 		__io_complete_rw(req, ret, 0, issue_flags);
 	else
@@ -3515,6 +3521,27 @@ static bool need_read_all(struct io_kiocb *req)
 		S_ISBLK(file_inode(req->file)->i_mode);
 }
 
+static bool io_file_pos_lock(struct io_kiocb *req, bool force_nonblock)
+{
+	struct file *file = req->file;
+
+	if (!(req->flags & REQ_F_CUR_POS))
+		return false;
+	if (req->flags & REQ_F_CUR_POS_LOCK)
+		return false;
+	if (file->f_mode & FMODE_ATOMIC_POS && file_count(file) > 1) {
+		if (force_nonblock) {
+			if (!mutex_trylock(&file->f_pos_lock))
+				return true;
+		} else {
+			mutex_lock(&file->f_pos_lock);
+		}
+		req->flags |= REQ_F_CUR_POS_LOCK;
+	}
+	req->rw.kiocb.ki_pos = file->f_pos;
+	return false;
+}
+
 static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 {
 	struct io_rw_state __s, *s = &__s;
@@ -3543,7 +3570,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req))) {
+		if (unlikely(!io_file_supports_nowait(req) ||
+			     io_file_pos_lock(req, true))) {
 			ret = io_setup_async_rw(req, iovec, s, true);
 			return ret ?: -EAGAIN;
 		}
@@ -3551,6 +3579,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
+		io_file_pos_lock(req, false);
 	}
 
 	ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result);
@@ -3668,7 +3697,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 
 	if (force_nonblock) {
 		/* If the file doesn't support async, just async punt */
-		if (unlikely(!io_file_supports_nowait(req)))
+		if (unlikely(!io_file_supports_nowait(req) ||
+			     io_file_pos_lock(req, true)))
 			goto copy_iov;
 
 		/* file path doesn't support NOWAIT for non-direct_IO */
@@ -3680,6 +3710,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
 	} else {
 		/* Ensure we clear previously set non-block flag */
 		kiocb->ki_flags &= ~IOCB_NOWAIT;
+		io_file_pos_lock(req, false);
 	}
 
 	ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result);
@@ -6584,6 +6615,8 @@ static void io_clean_op(struct io_kiocb *req)
 		kfree(req->kbuf);
 		req->kbuf = NULL;
 	}
+	if (req->flags & REQ_F_CUR_POS_LOCK)
+		__f_unlock_pos(req->file);
 
 	if (req->flags & REQ_F_NEED_CLEANUP) {
 		switch (req->opcode) {

-- 
Jens Axboe




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux