On Fri, 2018-12-07 at 15:20 -0700, Jens Axboe wrote: > This is just like io_setup(), except add a flags argument to let the > caller control/define some of the io_context behavior. > > Outside of the flags, we add an iocb array and two user pointers for > future use. > > Signed-off-by: Jens Axboe <axboe@xxxxxxxxx> > --- > arch/x86/entry/syscalls/syscall_64.tbl | 1 + > fs/aio.c | 69 ++++++++++++++++---------- > include/linux/syscalls.h | 3 ++ > include/uapi/asm-generic/unistd.h | 4 +- > kernel/sys_ni.c | 1 + > 5 files changed, 52 insertions(+), 26 deletions(-) > > diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl > index f0b1709a5ffb..67c357225fb0 100644 > --- a/arch/x86/entry/syscalls/syscall_64.tbl > +++ b/arch/x86/entry/syscalls/syscall_64.tbl > @@ -343,6 +343,7 @@ > 332 common statx __x64_sys_statx > 333 common io_pgetevents __x64_sys_io_pgetevents > 334 common rseq __x64_sys_rseq > +335 common io_setup2 __x64_sys_io_setup2 > > # > # x32-specific system call numbers start at 512 to avoid cache impact > diff --git a/fs/aio.c b/fs/aio.c > index 173f1f79dc8f..26631d6872d2 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -100,6 +100,8 @@ struct kioctx { > > unsigned long user_id; > > + unsigned int flags; > + > struct __percpu kioctx_cpu *cpu; > > /* > @@ -686,10 +688,8 @@ static void aio_nr_sub(unsigned nr) > spin_unlock(&aio_nr_lock); > } > > -/* ioctx_alloc > - * Allocates and initializes an ioctx. Returns an ERR_PTR if it failed. > - */ > -static struct kioctx *ioctx_alloc(unsigned nr_events) > +static struct kioctx *io_setup_flags(unsigned long ctxid, > + unsigned int nr_events, unsigned int flags) > { > struct mm_struct *mm = current->mm; > struct kioctx *ctx; > @@ -701,6 +701,12 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) > */ > unsigned int max_reqs = nr_events; > > + if (unlikely(ctxid || nr_events == 0)) { > + pr_debug("EINVAL: ctx %lu nr_events %u\n", > + ctxid, nr_events); > + return ERR_PTR(-EINVAL); > + } > + > /* > * We keep track of the number of available ringbuffer slots, to prevent > * overflow (reqs_available), and we also use percpu counters for this. > @@ -726,6 +732,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) > if (!ctx) > return ERR_PTR(-ENOMEM); > > + ctx->flags = flags; > ctx->max_reqs = max_reqs; > > spin_lock_init(&ctx->ctx_lock); > @@ -1281,6 +1288,34 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, > return ret; > } > How about adding a comment similar to io_setup's below? And would you like to mention also io_setup2 in Documentation/sysctl/fs.txt? > +SYSCALL_DEFINE6(io_setup2, u32, nr_events, u32, flags, struct iocb __user *, > + iocbs, void __user *, user1, void __user *, user2, > + aio_context_t __user *, ctxp) > +{ > + struct kioctx *ioctx; > + unsigned long ctx; > + long ret; > + > + if (flags || user1 || user2) > + return -EINVAL; > + > + ret = get_user(ctx, ctxp); > + if (unlikely(ret)) > + goto out; > + > + ioctx = io_setup_flags(ctx, nr_events, flags); > + ret = PTR_ERR(ioctx); > + if (IS_ERR(ioctx)) > + goto out; > + > + ret = put_user(ioctx->user_id, ctxp); > + if (ret) > + kill_ioctx(current->mm, ioctx, NULL); > + percpu_ref_put(&ioctx->users); > +out: > + return ret; > +} > + > /* sys_io_setup: > * Create an aio_context capable of receiving at least nr_events. > * ctxp must not point to an aio_context that already exists, and > @@ -1296,7 +1331,7 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr, > */ > SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) > { > - struct kioctx *ioctx = NULL; > + struct kioctx *ioctx; > unsigned long ctx; > long ret; > > @@ -1304,14 +1339,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) > if (unlikely(ret)) > goto out; > > - ret = -EINVAL; > - if (unlikely(ctx || nr_events == 0)) { > - pr_debug("EINVAL: ctx %lu nr_events %u\n", > - ctx, nr_events); > - goto out; > - } > - > - ioctx = ioctx_alloc(nr_events); > + ioctx = io_setup_flags(ctx, nr_events, 0); > ret = PTR_ERR(ioctx); > if (!IS_ERR(ioctx)) { > ret = put_user(ioctx->user_id, ctxp); > @@ -1327,7 +1355,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp) > #ifdef CONFIG_COMPAT > COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) > { > - struct kioctx *ioctx = NULL; > + struct kioctx *ioctx; > unsigned long ctx; > long ret; > > @@ -1335,23 +1363,14 @@ COMPAT_SYSCALL_DEFINE2(io_setup, unsigned, nr_events, u32 __user *, ctx32p) > if (unlikely(ret)) > goto out; > > - ret = -EINVAL; > - if (unlikely(ctx || nr_events == 0)) { > - pr_debug("EINVAL: ctx %lu nr_events %u\n", > - ctx, nr_events); > - goto out; > - } > - > - ioctx = ioctx_alloc(nr_events); > + ioctx = io_setup_flags(ctx, nr_events, 0); > ret = PTR_ERR(ioctx); > if (!IS_ERR(ioctx)) { > - /* truncating is ok because it's a user address */ > - ret = put_user((u32)ioctx->user_id, ctx32p); > + ret = put_user(ioctx->user_id, ctx32p); > if (ret) > kill_ioctx(current->mm, ioctx, NULL); > percpu_ref_put(&ioctx->users); > } > - > out: > return ret; > } > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index 2ac3d13a915b..a20a663d583f 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -287,6 +287,9 @@ static inline void addr_limit_user_check(void) > */ > #ifndef CONFIG_ARCH_HAS_SYSCALL_WRAPPER > asmlinkage long sys_io_setup(unsigned nr_reqs, aio_context_t __user *ctx); > +asmlinkage long sys_io_setup2(unsigned, unsigned, struct iocb __user *, > + void __user *, void __user *, > + aio_context_t __user *); > asmlinkage long sys_io_destroy(aio_context_t ctx); > asmlinkage long sys_io_submit(aio_context_t, long, > struct iocb __user * __user *); > diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h > index 538546edbfbd..b4527ed373b0 100644 > --- a/include/uapi/asm-generic/unistd.h > +++ b/include/uapi/asm-generic/unistd.h > @@ -738,9 +738,11 @@ __SYSCALL(__NR_statx, sys_statx) > __SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents) > #define __NR_rseq 293 > __SYSCALL(__NR_rseq, sys_rseq) > +#define __NR_io_setup2 294 > +__SYSCALL(__NR_io_setup2, sys_io_setup2) > > #undef __NR_syscalls > -#define __NR_syscalls 294 > +#define __NR_syscalls 295 > > /* > * 32 bit systems traditionally used different > diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c > index df556175be50..17c8b4393669 100644 > --- a/kernel/sys_ni.c > +++ b/kernel/sys_ni.c > @@ -37,6 +37,7 @@ asmlinkage long sys_ni_syscall(void) > */ > > COND_SYSCALL(io_setup); > +COND_SYSCALL(io_setup2); > COND_SYSCALL_COMPAT(io_setup); > COND_SYSCALL(io_destroy); > COND_SYSCALL(io_submit);