This adds a simple interface that lets other parts of the kernel submit aio iocbs. Callers provide a function which is called as the IO completes. These iocbs aren't tracked to reduce overhead: they can't be canceled, callers limit the number in flight, and previous patches in this series removed retry-based aio. Signed-off-by: Zach Brown <zach.brown@xxxxxxxxxx> --- fs/aio.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/aio.h | 11 ++++++ 2 files changed, 97 insertions(+), 0 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 2406981..7a150c2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -843,6 +843,10 @@ int aio_complete(struct kiocb *iocb, long res, long res2) iocb->ki_users = 0; wake_up_process(iocb->ki_obj.tsk); return 1; + } else if (is_kernel_kiocb(iocb)) { + iocb->ki_obj.complete(iocb->ki_user_data, res); + aio_kernel_free(iocb); + return 0; } info = &ctx->ring_info; @@ -1706,3 +1710,85 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id, asmlinkage_protect(5, ret, ctx_id, min_nr, nr, events, timeout); return ret; } + +/* + * This allocates an iocb that will be used to submit and track completion of + * an IO that is issued from kernel space. We don't have a context, we don't + * limit the number pending, and we can't be canceled. The caller is expected + * to call the appropriate aio_kernel_init_() functions and then call + * aio_kernel_submit(). From that point forward progress is guaranteed by the + * file system aio method. Eventually the caller's completion callback will be + * called. + */ +struct kiocb *aio_kernel_alloc(gfp_t gfp) +{ + struct kiocb *iocb = kmem_cache_zalloc(kiocb_cachep, gfp); + if (iocb) + iocb->ki_key = KIOCB_KERNEL_KEY; + return iocb; +} +EXPORT_SYMBOL_GPL(aio_kernel_alloc); + +void aio_kernel_free(struct kiocb *iocb) +{ + if (iocb) + kmem_cache_free(kiocb_cachep, iocb); +} +EXPORT_SYMBOL_GPL(aio_kernel_free); + +/* + * ptr and count can be a buff and bytes or an iov and segs. + */ +void aio_kernel_init_rw(struct kiocb *iocb, struct file *filp, + unsigned short op, void *ptr, size_t nr, loff_t off) +{ + iocb->ki_filp = filp; + iocb->ki_opcode = op; + iocb->ki_buf = (char __user *)(unsigned long)ptr; + iocb->ki_left = nr; + iocb->ki_nbytes = nr; + iocb->ki_pos = off; +} +EXPORT_SYMBOL_GPL(aio_kernel_init_rw); + +void aio_kernel_init_callback(struct kiocb *iocb, + void (*complete)(u64 user_data, long res), + u64 user_data) +{ + iocb->ki_obj.complete = complete; + iocb->ki_user_data = user_data; +} +EXPORT_SYMBOL_GPL(aio_kernel_init_callback); + +/* + * The iocb is our responsibility once this is called. The caller must not + * reference it. This comes from aio_setup_iocb() modifying the iocb. + * + * Callers must be prepared for their iocb completion callback to be called the + * moment they enter this function. The completion callback may be called from + * any context. + * + * Returns: 0: the iocb completion callback will be called with the op result + * negative errno: the operation was not submitted and the iocb was freed + */ +int aio_kernel_submit(struct kiocb *iocb) +{ + int ret; + + BUG_ON(!is_kernel_kiocb(iocb)); + BUG_ON(!iocb->ki_obj.complete); + BUG_ON(!iocb->ki_filp); + + ret = aio_setup_iocb(iocb); + if (ret) { + aio_kernel_free(iocb); + return ret; + } + + ret = iocb->ki_retry(iocb); + if (ret != -EIOCBQUEUED) + aio_complete(iocb, ret, 0); + + return 0; +} +EXPORT_SYMBOL_GPL(aio_kernel_submit); diff --git a/include/linux/aio.h b/include/linux/aio.h index 4f88ec2..95ef1ea 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -24,6 +24,7 @@ struct kioctx; #define KIOCB_C_COMPLETE 0x02 #define KIOCB_SYNC_KEY (~0U) +#define KIOCB_KERNEL_KEY (~1U) /* ki_flags bits */ /* @@ -90,6 +91,7 @@ struct kiocb { union { void __user *user; struct task_struct *tsk; + void (*complete)(u64 user_data, long res); } ki_obj; __u64 ki_user_data; /* user's data for completion */ @@ -118,6 +120,7 @@ struct kiocb { }; #define is_sync_kiocb(iocb) ((iocb)->ki_key == KIOCB_SYNC_KEY) +#define is_kernel_kiocb(iocb) ((iocb)->ki_key == KIOCB_KERNEL_KEY) #define init_sync_kiocb(x, filp) \ do { \ struct task_struct *tsk = current; \ @@ -204,6 +207,14 @@ extern int aio_put_req(struct kiocb *iocb); extern int aio_complete(struct kiocb *iocb, long res, long res2); struct mm_struct; extern void exit_aio(struct mm_struct *mm); +struct kiocb *aio_kernel_alloc(gfp_t gfp); +void aio_kernel_free(struct kiocb *iocb); +void aio_kernel_init_rw(struct kiocb *iocb, struct file *filp, + unsigned short op, void *ptr, size_t nr, loff_t off); +void aio_kernel_init_callback(struct kiocb *iocb, + void (*complete)(u64 user_data, long res), + u64 user_data); +int aio_kernel_submit(struct kiocb *iocb); #else static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; } static inline int aio_put_req(struct kiocb *iocb) { return 0; } -- 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html