From: Selvakumar S <selvakuma.s1@xxxxxxxxxxx> Introduce three new opcodes for zone-append - IORING_OP_ZONE_APPEND : non-vectord, similiar to IORING_OP_WRITE IORING_OP_ZONE_APPENDV : vectored, similar to IORING_OP_WRITEV IORING_OP_ZONE_APPEND_FIXED : append using fixed-buffers Repurpose cqe->flags to return zone-relative offset. Signed-off-by: SelvaKumar S <selvakuma.s1@xxxxxxxxxxx> Signed-off-by: Kanchan Joshi <joshi.k@xxxxxxxxxxx> Signed-off-by: Nitesh Shetty <nj.shetty@xxxxxxxxxxx> Signed-off-by: Javier Gonzalez <javier.gonz@xxxxxxxxxxx> --- fs/io_uring.c | 72 +++++++++++++++++++++++++++++++++++++++++-- include/uapi/linux/io_uring.h | 8 ++++- 2 files changed, 77 insertions(+), 3 deletions(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 155f3d8..c14c873 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -649,6 +649,10 @@ struct io_kiocb { unsigned long fsize; u64 user_data; u32 result; +#ifdef CONFIG_BLK_DEV_ZONED + /* zone-relative offset for append, in bytes */ + u32 append_offset; +#endif u32 sequence; struct list_head link_list; @@ -875,6 +879,26 @@ static const struct io_op_def io_op_defs[] = { .hash_reg_file = 1, .unbound_nonreg_file = 1, }, + [IORING_OP_ZONE_APPEND] = { + .needs_mm = 1, + .needs_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + }, + [IORING_OP_ZONE_APPENDV] = { + .async_ctx = 1, + .needs_mm = 1, + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + }, + [IORING_OP_ZONE_APPEND_FIXED] = { + .needs_file = 1, + .hash_reg_file = 1, + .unbound_nonreg_file = 1, + .pollout = 1, + }, }; static void io_wq_submit_work(struct io_wq_work **workptr); @@ -1285,7 +1309,16 @@ static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags) if (likely(cqe)) { WRITE_ONCE(cqe->user_data, req->user_data); WRITE_ONCE(cqe->res, res); +#ifdef CONFIG_BLK_DEV_ZONED + if (req->opcode == IORING_OP_ZONE_APPEND || + req->opcode == IORING_OP_ZONE_APPENDV || + req->opcode == IORING_OP_ZONE_APPEND_FIXED) + WRITE_ONCE(cqe->res2, req->append_offset); + else + WRITE_ONCE(cqe->flags, cflags); +#else WRITE_ONCE(cqe->flags, cflags); +#endif } else if (ctx->cq_overflow_flushed) { WRITE_ONCE(ctx->rings->cq_overflow, atomic_inc_return(&ctx->cached_cq_overflow)); @@ -1961,6 +1994,9 @@ static void io_complete_rw_common(struct kiocb *kiocb, long res) static void io_complete_rw(struct kiocb *kiocb, long res, long res2) { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); +#ifdef CONFIG_BLK_DEV_ZONED + req->append_offset = (u32)res2; +#endif io_complete_rw_common(kiocb, res); io_put_req(req); @@ -1976,6 +2012,9 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2) if (res != req->result) req_set_fail_links(req); req->result = res; +#ifdef CONFIG_BLK_DEV_ZONED + req->append_offset = (u32)res2; +#endif if (res != -EAGAIN) WRITE_ONCE(req->iopoll_completed, 1); } @@ -2408,7 +2447,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, u8 opcode; opcode = req->opcode; - if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED) { + if (opcode == IORING_OP_READ_FIXED || opcode == IORING_OP_WRITE_FIXED || + opcode == IORING_OP_ZONE_APPEND_FIXED) { *iovec = NULL; return io_import_fixed(req, rw, iter); } @@ -2417,7 +2457,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req, if (req->buf_index && !(req->flags & REQ_F_BUFFER_SELECT)) return -EINVAL; - if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE) { + if (opcode == IORING_OP_READ || opcode == IORING_OP_WRITE || + opcode == IORING_OP_ZONE_APPEND) { if (req->flags & REQ_F_BUFFER_SELECT) { buf = io_rw_buffer_select(req, &sqe_len, needs_lock); if (IS_ERR(buf)) { @@ -2704,6 +2745,9 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) req->rw.kiocb.ki_flags &= ~IOCB_NOWAIT; req->result = 0; +#ifdef CONFIG_BLK_DEV_ZONED + req->append_offset = 0; +#endif io_size = ret; if (req->flags & REQ_F_LINK_HEAD) req->result = io_size; @@ -2738,6 +2782,13 @@ static int io_write(struct io_kiocb *req, bool force_nonblock) __sb_writers_release(file_inode(req->file)->i_sb, SB_FREEZE_WRITE); } +#ifdef CONFIG_BLK_DEV_ZONED + if (req->opcode == IORING_OP_ZONE_APPEND || + req->opcode == IORING_OP_ZONE_APPENDV || + req->opcode == IORING_OP_ZONE_APPEND_FIXED) + kiocb->ki_flags |= IOCB_ZONE_APPEND; +#endif + kiocb->ki_flags |= IOCB_WRITE; if (!force_nonblock) @@ -4906,6 +4957,12 @@ static int io_req_defer_prep(struct io_kiocb *req, case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: +#ifdef CONFIG_BLK_DEV_ZONED + fallthrough; + case IORING_OP_ZONE_APPEND: + case IORING_OP_ZONE_APPENDV: + case IORING_OP_ZONE_APPEND_FIXED: +#endif ret = io_write_prep(req, sqe, true); break; case IORING_OP_POLL_ADD: @@ -5038,6 +5095,12 @@ static void io_cleanup_req(struct io_kiocb *req) case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: +#ifdef CONFIG_BLK_DEV_ZONED + fallthrough; + case IORING_OP_ZONE_APPEND: + case IORING_OP_ZONE_APPENDV: + case IORING_OP_ZONE_APPEND_FIXED: +#endif if (io->rw.iov != io->rw.fast_iov) kfree(io->rw.iov); break; @@ -5086,6 +5149,11 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe, } ret = io_read(req, force_nonblock); break; +#ifdef CONFIG_BLK_DEV_ZONED + case IORING_OP_ZONE_APPEND: + case IORING_OP_ZONE_APPENDV: + case IORING_OP_ZONE_APPEND_FIXED: +#endif case IORING_OP_WRITEV: case IORING_OP_WRITE_FIXED: case IORING_OP_WRITE: diff --git a/include/uapi/linux/io_uring.h b/include/uapi/linux/io_uring.h index 92c2269..6c8e932 100644 --- a/include/uapi/linux/io_uring.h +++ b/include/uapi/linux/io_uring.h @@ -130,6 +130,9 @@ enum { IORING_OP_PROVIDE_BUFFERS, IORING_OP_REMOVE_BUFFERS, IORING_OP_TEE, + IORING_OP_ZONE_APPEND, + IORING_OP_ZONE_APPENDV, + IORING_OP_ZONE_APPEND_FIXED, /* this goes last, obviously */ IORING_OP_LAST, @@ -157,7 +160,10 @@ enum { struct io_uring_cqe { __u64 user_data; /* sqe->data submission passed back */ __s32 res; /* result code for this event */ - __u32 flags; + union { + __u32 res2; /* res2 like aio, currently used for zone-append */ + __u32 flags; + }; }; /* -- 2.7.4