io_uring should be protecting the current file position with the file position lock, ->f_pos_lock. Grab and track the lock state when the request is being issued, and make the unlock part of request cleaning. Fixes: ba04291eb66e ("io_uring: allow use of offset == -1 to mean file position") Reported-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> --- Main thing I don't like here: - We're holding the f_pos_lock across the kernel/user boundary, as it's held for the duration of the IO. Alternatively we could keep it local to io_read() and io_write() and lose REQ_F_CUR_POS_LOCK, but will messy up those functions more and add more items to the fast path (which current position read/write definitely is not). Suggestions welcome... diff --git a/fs/io_uring.c b/fs/io_uring.c index fb2a0cb4aaf8..6f7713ab2eda 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -112,7 +112,7 @@ #define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \ REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \ - REQ_F_ASYNC_DATA) + REQ_F_ASYNC_DATA | REQ_F_CUR_POS_LOCK) #define IO_TCTX_REFS_CACHE_NR (1U << 10) @@ -726,6 +726,7 @@ enum { REQ_F_FAIL_BIT = 8, REQ_F_INFLIGHT_BIT, REQ_F_CUR_POS_BIT, + REQ_F_CUR_POS_LOCK_BIT, REQ_F_NOWAIT_BIT, REQ_F_LINK_TIMEOUT_BIT, REQ_F_NEED_CLEANUP_BIT, @@ -765,6 +766,8 @@ enum { REQ_F_INFLIGHT = BIT(REQ_F_INFLIGHT_BIT), /* read/write uses file position */ REQ_F_CUR_POS = BIT(REQ_F_CUR_POS_BIT), + /* request is holding file position lock */ + REQ_F_CUR_POS_LOCK = BIT(REQ_F_CUR_POS_LOCK_BIT), /* must not punt to workers */ REQ_F_NOWAIT = BIT(REQ_F_NOWAIT_BIT), /* has or had linked timeout */ @@ -2715,6 +2718,8 @@ static bool __io_complete_rw_common(struct io_kiocb *req, long res) req_set_fail(req); req->result = res; } + if (req->flags & REQ_F_CUR_POS && res > 0) + req->file->f_pos += res; return false; } @@ -2892,12 +2897,15 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe) kiocb->ki_pos = READ_ONCE(sqe->off); if (kiocb->ki_pos == -1) { - if (!(file->f_mode & FMODE_STREAM)) { + /* + * If we end up using the current file position, just set the + * flag and the actual file position will be read in the op + * handler themselves. + */ + if (!(file->f_mode & FMODE_STREAM)) req->flags |= REQ_F_CUR_POS; - kiocb->ki_pos = file->f_pos; - } else { + else kiocb->ki_pos = 0; - } } kiocb->ki_flags = iocb_flags(file); ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); @@ -2979,8 +2987,6 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, ret += io->bytes_done; } - if (req->flags & REQ_F_CUR_POS) - req->file->f_pos = kiocb->ki_pos; if (ret >= 0 && (kiocb->ki_complete == io_complete_rw)) __io_complete_rw(req, ret, 0, issue_flags); else @@ -3515,6 +3521,27 @@ static bool need_read_all(struct io_kiocb *req) S_ISBLK(file_inode(req->file)->i_mode); } +static bool io_file_pos_lock(struct io_kiocb *req, bool force_nonblock) +{ + struct file *file = req->file; + + if (!(req->flags & REQ_F_CUR_POS)) + return false; + if (req->flags & REQ_F_CUR_POS_LOCK) + return false; + if (file->f_mode & FMODE_ATOMIC_POS && file_count(file) > 1) { + if (force_nonblock) { + if (!mutex_trylock(&file->f_pos_lock)) + return true; + } else { + mutex_lock(&file->f_pos_lock); + } + req->flags |= REQ_F_CUR_POS_LOCK; + } + req->rw.kiocb.ki_pos = file->f_pos; + return false; +} + static int io_read(struct io_kiocb *req, unsigned int issue_flags) { struct io_rw_state __s, *s = &__s; @@ -3543,7 +3570,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) if (force_nonblock) { /* If the file doesn't support async, just async punt */ - if (unlikely(!io_file_supports_nowait(req))) { + if (unlikely(!io_file_supports_nowait(req) || + io_file_pos_lock(req, true))) { ret = io_setup_async_rw(req, iovec, s, true); return ret ?: -EAGAIN; } @@ -3551,6 +3579,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) } else { /* Ensure we clear previously set non-block flag */ kiocb->ki_flags &= ~IOCB_NOWAIT; + io_file_pos_lock(req, false); } ret = rw_verify_area(READ, req->file, io_kiocb_ppos(kiocb), req->result); @@ -3668,7 +3697,8 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) if (force_nonblock) { /* If the file doesn't support async, just async punt */ - if (unlikely(!io_file_supports_nowait(req))) + if (unlikely(!io_file_supports_nowait(req) || + io_file_pos_lock(req, true))) goto copy_iov; /* file path doesn't support NOWAIT for non-direct_IO */ @@ -3680,6 +3710,7 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) } else { /* Ensure we clear previously set non-block flag */ kiocb->ki_flags &= ~IOCB_NOWAIT; + io_file_pos_lock(req, false); } ret = rw_verify_area(WRITE, req->file, io_kiocb_ppos(kiocb), req->result); @@ -6584,6 +6615,8 @@ static void io_clean_op(struct io_kiocb *req) kfree(req->kbuf); req->kbuf = NULL; } + if (req->flags & REQ_F_CUR_POS_LOCK) + __f_unlock_pos(req->file); if (req->flags & REQ_F_NEED_CLEANUP) { switch (req->opcode) { -- Jens Axboe