On Fri, Jul 31, 2020 at 6:38 PM hch@xxxxxxxxxxxxx <hch@xxxxxxxxxxxxx> wrote: > > And FYI, this is what I'd do for a hacky aio-only prototype (untested): > > > diff --git a/fs/aio.c b/fs/aio.c > index 91e7cc4a9f179b..42b1934e38758b 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -1438,7 +1438,10 @@ static void aio_complete_rw(struct kiocb *kiocb, long res, long res2) > } > > iocb->ki_res.res = res; > - iocb->ki_res.res2 = res2; > + if ((kiocb->ki_flags & IOCB_REPORT_OFFSET) && res > 0) > + iocb->ki_res.res2 = kiocb->ki_pos - res; > + else > + iocb->ki_res.res2 = res2; > iocb_put(iocb); > } > > @@ -1452,6 +1455,8 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) > req->ki_flags = iocb_flags(req->ki_filp); > if (iocb->aio_flags & IOCB_FLAG_RESFD) > req->ki_flags |= IOCB_EVENTFD; > + if (iocb->aio_flags & IOCB_FLAG_REPORT_OFFSET) > + req->ki_flags |= IOCB_REPORT_OFFSET; > req->ki_hint = ki_hint_validate(file_write_hint(req->ki_filp)); > if (iocb->aio_flags & IOCB_FLAG_IOPRIO) { > /* > diff --git a/include/linux/fs.h b/include/linux/fs.h > index f5abba86107d86..522b0a3437d420 100644 > --- a/include/linux/fs.h > +++ b/include/linux/fs.h > @@ -316,6 +316,7 @@ enum rw_hint { > #define IOCB_WRITE (1 << 6) > #define IOCB_NOWAIT (1 << 7) > #define IOCB_NOIO (1 << 9) > +#define IOCB_REPORT_OFFSET (1 << 10) > > struct kiocb { > struct file *ki_filp; > diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h > index 8387e0af0f768a..e4313d7aa3b7e7 100644 > --- a/include/uapi/linux/aio_abi.h > +++ b/include/uapi/linux/aio_abi.h > @@ -55,6 +55,7 @@ enum { > */ > #define IOCB_FLAG_RESFD (1 << 0) > #define IOCB_FLAG_IOPRIO (1 << 1) > +#define IOCB_FLAG_REPORT_OFFSET (1 << 2) > > /* read() from /dev/aio returns these structures. */ > struct io_event { Looks good, but it drops io_uring. How about two flags - 1. RWF_REPORT_OFFSET (only for aio) ----> aio fails the second one 2. RWF_REPORT_OFFSET_INDIRECT (for io_uring). ----> uring fails the first one Since these are RWF flags, they can be used by other sync/async transports also in future if need be. Either of these flags will set single IOCB_REPORT_OFFSET, which can be used by FS/Block etc (they don't have to worry how uring/aio sends it up). This is what I mean in code - diff --git a/fs/aio.c b/fs/aio.c index 91e7cc4a9f17..307dfbfb04f7 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1472,6 +1472,11 @@ static int aio_prep_rw(struct kiocb *req, const struct iocb *iocb) ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); if (unlikely(ret)) return ret; + /* support only direct offset */ + if (unlikely(iocb->aio_rw_flags & RWF_REPORT_OFFSET_INDIRECT)) + return -EOPNOTSUPP; + req->ki_flags &= ~IOCB_HIPRI; /* no one is going to poll for this I/O */ return 0; diff --git a/fs/io_uring.c b/fs/io_uring.c index 3e406bc1f855..5fa21644251f 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -2451,6 +2451,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, struct kiocb *kiocb = &req->rw.kiocb; unsigned ioprio; int ret; + rwf_t rw_flags; if (S_ISREG(file_inode(req->file)->i_mode)) req->flags |= REQ_F_ISREG; @@ -2462,9 +2463,13 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe, } kiocb->ki_hint = ki_hint_validate(file_write_hint(kiocb->ki_filp)); kiocb->ki_flags = iocb_flags(kiocb->ki_filp); - ret = kiocb_set_rw_flags(kiocb, READ_ONCE(sqe->rw_flags)); + rw_flags = READ_ONCE(sqe->rw_flags); + ret = kiocb_set_rw_flags(kiocb, rw_flags); if (unlikely(ret)) return ret; + /* support only indirect offset */ + if (unlikely(rw_flags & RWF_REPORT_OFFSET_DIRECT)) + return -EOPNOTSUPP; ioprio = READ_ONCE(sqe->ioprio); if (ioprio) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 8a00ba99284e..fe2f1f5c5d33 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3296,8 +3296,17 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) ki->ki_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); - if (flags & RWF_APPEND) + if (flags & RWF_APPEND) { ki->ki_flags |= IOCB_APPEND; + /* + * 1. These flags do not make sense when used standalone + * 2. RWF_REPORT_OFFSET_DIRECT = report result directly (for aio) + * 3. RWF_REPORT_INDIRECT_OFFSER = use pointer (for io_uring) + * */ + if (flags & RWF_REPORT_OFFSET_DIRECT || + flags & RWF_REPORT_OFFSET_INDIRECT) + ki->ki_flags |= IOCB_REPORT_OFFSET;