I like this idea, but there are a couple issues here. First the flag per command really doesn't work - we need a creation time flag. Unfortunately the existing io_setup system call doesn't take flags, so we'll need to add a new one. Second we need a check that the polling mode is actually supported for a given file descriptor. Third this hardcodes block device knowlege into the block layer. We really need move it into a method. See my third attached patch for an (untested) idea how to make that work. The first two are just cleanups. Fourth we already have aio ->poll support. So this needs a different naming scheme, e.g. *iopoll*.
>From 8b0d8f2e723bcf52d010c46130eb759770a0dc11 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@xxxxxx> Date: Mon, 19 Nov 2018 08:13:19 +0100 Subject: aio: split get_reqs_available from aio_get_req This makes the polled case nice to handle, and matches the put side. Signed-off-by: Christoph Hellwig <hch@xxxxxx> --- fs/aio.c | 41 ++++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index e02085fe10d7..348f04129035 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -935,7 +935,7 @@ static void put_reqs_available(struct kioctx *ctx, unsigned nr) local_irq_restore(flags); } -static bool get_reqs_available(struct kioctx *ctx) +static bool __get_reqs_available(struct kioctx *ctx) { struct kioctx_cpu *kcpu; bool ret = false; @@ -1027,23 +1027,25 @@ static void user_refill_reqs_available(struct kioctx *ctx) spin_unlock_irq(&ctx->completion_lock); } +static bool get_reqs_available(struct kioctx *ctx) +{ + if (__get_reqs_available(ctx)) + return true; + user_refill_reqs_available(ctx); + return __get_reqs_available(ctx); +} + /* aio_get_req * Allocate a slot for an aio request. * Returns NULL if no requests are free. */ -static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx, bool needs_ring) +static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) { struct aio_kiocb *req; - if (needs_ring && !get_reqs_available(ctx)) { - user_refill_reqs_available(ctx); - if (!get_reqs_available(ctx)) - return NULL; - } - req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO); if (unlikely(!req)) - goto out_put; + return NULL; percpu_ref_get(&ctx->reqs); INIT_LIST_HEAD(&req->ki_list); @@ -1051,10 +1053,6 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx, bool needs_ring) refcount_set(&req->ki_refcnt, 0); req->ki_ctx = ctx; return req; -out_put: - if (needs_ring) - put_reqs_available(ctx, 1); - return NULL; } static struct kioctx *lookup_ioctx(unsigned long ctx_id) @@ -2200,17 +2198,21 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return -EINVAL; } - if (iocb.aio_flags & IOCB_FLAG_HIPRI) + if (iocb.aio_flags & IOCB_FLAG_HIPRI) { ctx_type = CTX_TYPE_POLLED; - else + } else { ctx_type = CTX_TYPE_NORMAL; + if (!get_reqs_available(ctx)) + return -EAGAIN; + } /* * Polled IO doesn't need ring reservations */ - req = aio_get_req(ctx, ctx_type == CTX_TYPE_NORMAL); + ret = -EAGAIN; + req = aio_get_req(ctx); if (unlikely(!req)) - return -EAGAIN; + goto out_put_reqs_available; if (iocb.aio_flags & IOCB_FLAG_RESFD) { /* @@ -2294,12 +2296,13 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, return 0; out_put_req: - if (ctx_type == CTX_TYPE_NORMAL) - put_reqs_available(ctx, 1); percpu_ref_put(&ctx->reqs); if (req->ki_eventfd) eventfd_ctx_put(req->ki_eventfd); kmem_cache_free(kiocb_cachep, req); +out_put_reqs_available: + if (ctx_type == CTX_TYPE_NORMAL) + put_reqs_available(ctx, 1); return ret; } -- 2.19.1
>From a42c0af6c96b47d9b915e4efdaa0211e9b6b5253 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@xxxxxx> Date: Sun, 18 Nov 2018 16:24:22 +0100 Subject: cleanup the aio_poll_reap related flow Should not change behavior except for fixing a bug where the number of returned iocbs was incorrectly overwritten when we actually loop on poll_done for the first call. I don't really understand why we loop there but not the second time we call it. Nor do I really understand the nested loop in the callers of __aio_check_polled, but that is for another time.. --- fs/aio.c | 152 ++++++++++++++++++------------------------------------- 1 file changed, 50 insertions(+), 102 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 348f04129035..d9198f99ed97 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1091,7 +1091,7 @@ static inline void iocb_put(struct aio_kiocb *iocb) } } -static void iocb_put_many(struct kioctx *ctx, void **iocbs, int nr) +static void iocb_put_many(struct kioctx *ctx, void **iocbs, int nr) { if (nr) { kmem_cache_free_bulk(kiocb_cachep, nr, iocbs); @@ -1316,42 +1316,33 @@ static struct block_device *aio_bdev_host(struct kiocb *req) #define AIO_POLL_STACK 8 -struct aio_poll_data { - struct io_event __user *evs; - int off; - long max; - void *iocbs[AIO_POLL_STACK]; - int to_free; -}; - /* - * Process the done_list of iocbs, copy to user space, and free them. - * Migh return with data->iocbs holding entries, in which case - * data->to_free is non-zero and the caller should free them. + * Process the done_list of iocbs, copy to user space, and free them. Might + * return with iocbs holding entries, in which case *to_free is non-zero and + * the caller should free them. */ -static long aio_poll_reap(struct kioctx *ctx, struct aio_poll_data *data) +static long aio_poll_reap(struct kioctx *ctx, struct io_event __user *evs, + int off, long max, void **iocbs, int *to_free) __releases(&ctx->poll_lock) __acquires(&ctx->poll_lock) { struct aio_kiocb *iocb; int ret, nr = 0; -restart: - while (!list_empty(&ctx->poll_done)) { + while ((iocb = list_first_entry_or_null(&ctx->poll_done, + struct aio_kiocb, ki_poll_list))) { struct io_event __user *uev; struct io_event ev; - if (data->to_free == ARRAY_SIZE(data->iocbs)) { - iocb_put_many(ctx, data->iocbs, data->to_free); - data->to_free = 0; + if (*to_free == AIO_POLL_STACK) { + iocb_put_many(ctx, iocbs, *to_free); + *to_free = 0; } - iocb = list_first_entry(&ctx->poll_done, struct aio_kiocb, - ki_poll_list); list_del(&iocb->ki_poll_list); + iocbs[*to_free++] = iocb; - data->iocbs[data->to_free++] = iocb; - if (!data->evs) { + if (!evs) { nr++; continue; } @@ -1361,65 +1352,26 @@ static long aio_poll_reap(struct kioctx *ctx, struct aio_poll_data *data) ev.res = iocb->ki_poll_res; ev.res2 = iocb->ki_poll_res2; - uev = data->evs + nr + data->off; - if (!__copy_to_user_inatomic(uev, &ev, sizeof(*uev))) { - nr++; - if (nr + data->off < data->max) - continue; - break; + uev = evs + nr + off; + if (unlikely(__copy_to_user_inatomic(uev, &ev, sizeof(*uev)))) { + /* + * Unexpected slow path, drop lock and attempt copy + * again. If this also fails we are done. + */ + spin_unlock_irq(&ctx->poll_lock); + ret = copy_to_user(uev, &ev, sizeof(*uev)); + spin_lock_irq(&ctx->poll_lock); + if (ret) + return nr ? nr : -EFAULT; } - /* - * Unexpected slow path, drop lock and attempt copy. If this - * also fails, we're done. If it worked, we got another event - * and we restart the list check since we dropped the lock. - */ - spin_unlock_irq(&ctx->poll_lock); - ret = copy_to_user(uev, &ev, sizeof(*uev)); - spin_lock_irq(&ctx->poll_lock); - if (!ret) { - nr++; - if (nr + data->off < data->max) - goto restart; - + if (++nr + off == max) break; - } - - if (!nr) - nr = -EFAULT; - break; } return nr; } -/* - * Reap done events, if any - */ -static long aio_poll_find(struct kioctx *ctx, struct io_event __user *evs, - int off, long max) -{ - struct aio_poll_data data = { - .evs = evs, - .off = off, - .max = max, - .to_free = 0 - }; - int ret; - - if (list_empty_careful(&ctx->poll_done)) - return 0; - - spin_lock_irq(&ctx->poll_lock); - ret = aio_poll_reap(ctx, &data); - spin_unlock_irq(&ctx->poll_lock); - - if (data.to_free) - iocb_put_many(ctx, data.iocbs, data.to_free); - - return ret; -} - static void aio_poll_for_events(struct kioctx *ctx, struct aio_iopoll_data *pd, unsigned int nr_pd, int off, long min, long max) { @@ -1448,42 +1400,32 @@ static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event, int off, unsigned int *entries, long min, long max) { struct aio_iopoll_data pd[AIO_POLL_STACK]; + void *iocbs[AIO_POLL_STACK]; + int to_free = 0; struct aio_kiocb *iocb; unsigned int nr_pd; - int ret, pre = 0; + int ret, found = 0; if (list_empty_careful(&ctx->poll_pending)) goto out; - spin_lock_irq(&ctx->poll_lock); - /* * Check if we already have done events that satisfy what we need */ - while (!list_empty(&ctx->poll_done)) { - struct aio_poll_data data = { - .evs = event, - .off = off, - .max = max, - .to_free = 0 - }; - - ret = aio_poll_reap(ctx, &data); - if (!ret) - break; - else if (ret < 0 || ret + off >= min) { + spin_lock_irq(&ctx->poll_lock); + while ((ret = aio_poll_reap(ctx, event, off, max, iocbs, &to_free))) { + if (ret < 0 || ret + off >= min) { spin_unlock_irq(&ctx->poll_lock); - - if (data.to_free) - iocb_put_many(ctx, data.iocbs, data.to_free); - + if (to_free) + iocb_put_many(ctx, iocbs, to_free); return ret; } - if (data.to_free) - iocb_put_many(ctx, data.iocbs, data.to_free); - - pre = ret; + if (to_free) { + iocb_put_many(ctx, iocbs, to_free); + to_free = 0; + } + found += ret; off += ret; } @@ -1518,13 +1460,19 @@ static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event, } out: - ret = aio_poll_find(ctx, event, off, max); - if (ret >= 0) - return pre + ret; - else if (pre) - return pre; + if (!list_empty_careful(&ctx->poll_done)) { + spin_lock_irq(&ctx->poll_lock); + ret = aio_poll_reap(ctx, event, off, max, iocbs, &to_free); + spin_unlock_irq(&ctx->poll_lock); + + if (to_free) + iocb_put_many(ctx, iocbs, to_free); + if (ret < 0) + return ret; + found += ret; + } - return ret; + return found; } /* -- 2.19.1
>From f44b92fc87f2f83946af12db9faea5916016a486 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig <hch@xxxxxx> Date: Sun, 18 Nov 2018 17:17:55 +0100 Subject: use array of iocbs XXX: will need some protection against concurrent reaps --- fs/aio.c | 121 +++++++++++++----------------------------- fs/block_dev.c | 20 +++++-- fs/direct-io.c | 4 +- fs/iomap.c | 53 +++++++++++------- fs/xfs/xfs_file.c | 1 + include/linux/fs.h | 2 +- include/linux/iomap.h | 1 + 7 files changed, 90 insertions(+), 112 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index d9198f99ed97..cb2fead2ab7c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -89,6 +89,9 @@ struct ctx_rq_wait { enum { CTX_TYPE_NORMAL = 0, CTX_TYPE_POLLED, + + /* currently undergoing a polling io_getevents */ + CTX_TYPE_POLLING, }; struct kioctx { @@ -1282,38 +1285,6 @@ static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr, return ret < 0 || *i >= min_nr; } -struct aio_iopoll_data { - unsigned int blk_qc; - struct block_device *bdev; -}; - -static int aio_io_poll(struct aio_iopoll_data *pd, bool wait) -{ -#ifdef CONFIG_BLOCK - /* - * Should only happen if someone sets ->ki_blk_qc at random, - * not being a blockdev target. We'll just ignore it, the IO - * will complete normally without being polled. - */ - if (pd->bdev) - return blk_poll(bdev_get_queue(pd->bdev), pd->blk_qc, wait); -#endif - - return 0; -} - -static struct block_device *aio_bdev_host(struct kiocb *req) -{ - struct inode *inode = req->ki_filp->f_mapping->host; - - if (S_ISBLK(inode->i_mode)) - return I_BDEV(inode); - else if (inode->i_sb && inode->i_sb->s_bdev) - return inode->i_sb->s_bdev; - - return NULL; -} - #define AIO_POLL_STACK 8 /* @@ -1372,39 +1343,12 @@ static long aio_poll_reap(struct kioctx *ctx, struct io_event __user *evs, return nr; } -static void aio_poll_for_events(struct kioctx *ctx, struct aio_iopoll_data *pd, - unsigned int nr_pd, int off, long min, long max) -{ - int i, polled = 0; - - /* - * Poll for needed events with wait == true, anything - * after that we just check if we have more, up to max. - */ - for (i = 0; i < nr_pd; i++) { - bool wait = polled + off >= min; - - polled += aio_io_poll(&pd[i], wait); - if (polled + off >= max) - break; - - /* - * If we have entries waiting to be reaped, stop polling - */ - if (!list_empty_careful(&ctx->poll_done)) - break; - } -} - static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event, - int off, unsigned int *entries, long min, long max) + int off, unsigned int *to_poll, long min, long max) { - struct aio_iopoll_data pd[AIO_POLL_STACK]; void *iocbs[AIO_POLL_STACK]; - int to_free = 0; struct aio_kiocb *iocb; - unsigned int nr_pd; - int ret, found = 0; + int to_free = 0, found = 0, polled = 0, ret, i; if (list_empty_careful(&ctx->poll_pending)) goto out; @@ -1433,30 +1377,27 @@ static int __aio_check_polled(struct kioctx *ctx, struct io_event __user *event, * Find up to 'max_nr' worth of events to poll for, including the * events we already successfully polled */ - nr_pd = 0; list_for_each_entry(iocb, &ctx->poll_pending, ki_poll_list) { - struct kiocb *kiocb = &iocb->rw; - blk_qc_t qc; - - /* - * Not submitted yet, don't poll for it - */ - qc = READ_ONCE(kiocb->ki_blk_qc); - if (qc == BLK_QC_T_NONE) - continue; - - pd[nr_pd].blk_qc = qc; - pd[nr_pd].bdev = aio_bdev_host(kiocb); - - ++nr_pd; - if (nr_pd == ARRAY_SIZE(pd) || nr_pd + off >= max) + iocbs[*to_poll] = iocb; + (*to_poll)++; + if (*to_poll == AIO_POLL_STACK || *to_poll + off >= max) break; } spin_unlock_irq(&ctx->poll_lock); - if (nr_pd) { - *entries = nr_pd; - aio_poll_for_events(ctx, pd, nr_pd, off, min, max); + /* + * Poll for needed events with wait == true, anything after that we just + * check if we have more, up to max. + * + * If we have entries waiting to be reaped, stop polling + */ + for (i = 0; i < *to_poll; i++) { + bool wait = polled + off >= min; + + iocb = iocbs[*to_poll]; + polled += iocb->rw.ki_filp->f_op->iopoll(&iocb->rw, wait); + if (polled + off >= max || !list_empty_careful(&ctx->poll_done)) + break; } out: @@ -1502,6 +1443,10 @@ static int aio_check_polled(struct kioctx *ctx, long min_nr, long nr, unsigned int found; int this, ret = 0; + /* We can only allow a single thread to poll a context at a time */ + if (test_and_set_bit(CTX_TYPE_POLLING, &ctx->io_type)) + return -EBUSY; + if (!access_ok(VERIFY_WRITE, event, nr * sizeof(*event))) return -EFAULT; @@ -1522,6 +1467,7 @@ static int aio_check_polled(struct kioctx *ctx, long min_nr, long nr, ret += this; } while (found && ret < min_nr); + clear_bit(CTX_TYPE_POLLING, &ctx->io_type); return ret; } @@ -1737,14 +1683,19 @@ static int aio_prep_rw(struct aio_kiocb *kiocb, struct iocb *iocb) if (iocb->aio_flags & IOCB_FLAG_HIPRI) { struct kioctx *ctx = kiocb->ki_ctx; + ret = -EOPNOTSUPP; + if (!(req->ki_flags & IOCB_DIRECT) || + !req->ki_filp->f_op->iopoll) + goto out_fput; + req->ki_flags |= IOCB_HIPRI; - req->ki_blk_qc = BLK_QC_T_NONE; req->ki_complete = aio_complete_rw_poll; spin_lock_irq(&ctx->poll_lock); list_add_tail(&kiocb->ki_poll_list, &ctx->poll_pending); spin_unlock_irq(&ctx->poll_lock); } else { + req->ki_flags &= ~IOCB_HIPRI; req->ki_complete = aio_complete_rw; } @@ -1761,8 +1712,7 @@ static int aio_prep_rw(struct aio_kiocb *kiocb, struct iocb *iocb) ret = ioprio_check_cap(iocb->aio_reqprio); if (ret) { pr_debug("aio ioprio check cap error: %d\n", ret); - fput(req->ki_filp); - return ret; + goto out_fput; } req->ki_ioprio = iocb->aio_reqprio; @@ -1771,7 +1721,10 @@ static int aio_prep_rw(struct aio_kiocb *kiocb, struct iocb *iocb) ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags); if (unlikely(ret)) - fput(req->ki_filp); + goto out_fput; + return 0; +out_fput: + fput(req->ki_filp); return ret; } diff --git a/fs/block_dev.c b/fs/block_dev.c index 8a2fed18e3fc..8ba58e280ac6 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -236,7 +236,6 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio.bi_opf |= REQ_HIPRI; qc = submit_bio(&bio); - WRITE_ONCE(iocb->ki_blk_qc, qc); for (;;) { __set_current_state(TASK_UNINTERRUPTIBLE); @@ -274,6 +273,7 @@ struct blkdev_dio { }; size_t size; atomic_t ref; + blk_qc_t qc; bool multi_bio : 1; bool should_dirty : 1; bool is_sync : 1; @@ -282,6 +282,14 @@ struct blkdev_dio { static struct bio_set blkdev_dio_pool; +static bool blkdev_iopoll(struct kiocb *kiocb, bool wait) +{ + struct blkdev_dio *dio = kiocb->private; + struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host); + + return blk_poll(bdev_get_queue(bdev), READ_ONCE(dio->qc), wait); +} + static void blkdev_bio_end_io(struct bio *bio) { struct blkdev_dio *dio = bio->bi_private; @@ -336,7 +344,6 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0; bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; - blk_qc_t qc = BLK_QC_T_NONE; int ret = 0; if ((pos | iov_iter_alignment(iter)) & @@ -356,6 +363,9 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) dio->size = 0; dio->multi_bio = false; dio->should_dirty = is_read && iter_is_iovec(iter); + dio->qc = BLK_QC_T_NONE; + + iocb->private = dio; /* * Don't plug for HIPRI/polled IO, as those should go straight @@ -396,8 +406,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) if (iocb->ki_flags & IOCB_HIPRI) bio->bi_opf |= REQ_HIPRI; - qc = submit_bio(bio); - WRITE_ONCE(iocb->ki_blk_qc, qc); + WRITE_ONCE(dio->qc, submit_bio(bio)); break; } @@ -425,7 +434,7 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages) break; if (!(iocb->ki_flags & IOCB_HIPRI) || - !blk_poll(bdev_get_queue(bdev), qc, true)) + !blk_poll(bdev_get_queue(bdev), dio->qc, true)) io_schedule(); } __set_current_state(TASK_RUNNING); @@ -2063,6 +2072,7 @@ const struct file_operations def_blk_fops = { .llseek = block_llseek, .read_iter = blkdev_read_iter, .write_iter = blkdev_write_iter, + .iopoll = blkdev_iopoll, .mmap = generic_file_mmap, .fsync = blkdev_fsync, .unlocked_ioctl = block_ioctl, diff --git a/fs/direct-io.c b/fs/direct-io.c index 34de494e9061..a5a4e5a1423e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -477,10 +477,8 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio) if (sdio->submit_io) { sdio->submit_io(bio, dio->inode, sdio->logical_offset_in_bio); dio->bio_cookie = BLK_QC_T_NONE; - } else { + } else dio->bio_cookie = submit_bio(bio); - WRITE_ONCE(dio->iocb->ki_blk_qc, dio->bio_cookie); - } sdio->bio = NULL; sdio->boundary = 0; diff --git a/fs/iomap.c b/fs/iomap.c index 4cf412b6230a..e5cd9dbe78a8 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -1419,14 +1419,14 @@ struct iomap_dio { unsigned flags; int error; bool wait_for_completion; + blk_qc_t cookie; + struct request_queue *last_queue; union { /* used during submission and for synchronous completion: */ struct { struct iov_iter *iter; struct task_struct *waiter; - struct request_queue *last_queue; - blk_qc_t cookie; } submit; /* used for aio completion: */ @@ -1436,6 +1436,30 @@ struct iomap_dio { }; }; +bool iomap_dio_iopoll(struct kiocb *kiocb, bool wait) +{ + struct iomap_dio *dio = kiocb->private; + struct request_queue *q = READ_ONCE(dio->last_queue); + + if (!q) + return false; + return blk_poll(q, READ_ONCE(dio->cookie), wait); +} +EXPORT_SYMBOL_GPL(iomap_dio_iopoll); + +static void iomap_dio_submit_bio(struct iomap_dio *dio, struct iomap *iomap, + struct bio *bio) +{ + atomic_inc(&dio->ref); + + /* + * iomap_dio_iopoll can race with us. A non-zero last_queue marks that + * we are ready to poll. + */ + WRITE_ONCE(dio->cookie, submit_bio(bio)); + WRITE_ONCE(dio->last_queue, bdev_get_queue(iomap->bdev)); +} + static ssize_t iomap_dio_complete(struct iomap_dio *dio) { struct kiocb *iocb = dio->iocb; @@ -1548,14 +1572,13 @@ static void iomap_dio_bio_end_io(struct bio *bio) } } -static blk_qc_t +static void iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, unsigned len) { struct page *page = ZERO_PAGE(0); int flags = REQ_SYNC | REQ_IDLE; struct bio *bio; - blk_qc_t qc; bio = bio_alloc(GFP_KERNEL, 1); bio_set_dev(bio, iomap->bdev); @@ -1569,11 +1592,7 @@ iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, get_page(page); __bio_add_page(bio, page, len, 0); bio_set_op_attrs(bio, REQ_OP_WRITE, flags); - - atomic_inc(&dio->ref); - qc = submit_bio(bio); - WRITE_ONCE(dio->iocb->ki_blk_qc, qc); - return qc; + iomap_dio_submit_bio(dio, iomap, bio); } static loff_t @@ -1679,11 +1698,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length, copied += n; nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); - - atomic_inc(&dio->ref); - - dio->submit.last_queue = bdev_get_queue(iomap->bdev); - dio->iocb->ki_blk_qc = dio->submit.cookie = submit_bio(bio); + iomap_dio_submit_bio(dio, iomap, bio); } while (nr_pages); if (need_zeroout) { @@ -1785,6 +1800,7 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio = kmalloc(sizeof(*dio), GFP_KERNEL); if (!dio) return -ENOMEM; + iocb->private = dio; dio->iocb = iocb; atomic_set(&dio->ref, 1); @@ -1794,11 +1810,11 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, dio->error = 0; dio->flags = 0; dio->wait_for_completion = is_sync_kiocb(iocb); + dio->cookie = BLK_QC_T_NONE; + dio->last_queue = NULL; dio->submit.iter = iter; dio->submit.waiter = current; - dio->submit.cookie = BLK_QC_T_NONE; - dio->submit.last_queue = NULL; if (iov_iter_rw(iter) == READ) { if (pos >= dio->i_size) @@ -1897,9 +1913,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, break; if (!(iocb->ki_flags & IOCB_HIPRI) || - !dio->submit.last_queue || - !blk_poll(dio->submit.last_queue, - dio->submit.cookie, true)) + !dio->last_queue || + !blk_poll(dio->last_queue, dio->cookie, true)) io_schedule(); } __set_current_state(TASK_RUNNING); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 53c9ab8fb777..603e705781a4 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1203,6 +1203,7 @@ const struct file_operations xfs_file_operations = { .write_iter = xfs_file_write_iter, .splice_read = generic_file_splice_read, .splice_write = iter_file_splice_write, + .iopoll = iomap_dio_iopoll, .unlocked_ioctl = xfs_file_ioctl, #ifdef CONFIG_COMPAT .compat_ioctl = xfs_file_compat_ioctl, diff --git a/include/linux/fs.h b/include/linux/fs.h index 032761d9b218..1d46a10aef6c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -310,7 +310,6 @@ struct kiocb { int ki_flags; u16 ki_hint; u16 ki_ioprio; /* See linux/ioprio.h */ - u32 ki_blk_qc; } __randomize_layout; static inline bool is_sync_kiocb(struct kiocb *kiocb) @@ -1782,6 +1781,7 @@ struct file_operations { ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); ssize_t (*read_iter) (struct kiocb *, struct iov_iter *); ssize_t (*write_iter) (struct kiocb *, struct iov_iter *); + bool (*iopoll)(struct kiocb *kiocb, bool wait); int (*iterate) (struct file *, struct dir_context *); int (*iterate_shared) (struct file *, struct dir_context *); __poll_t (*poll) (struct file *, struct poll_table_struct *); diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 9a4258154b25..2cbe87ad1878 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -162,6 +162,7 @@ typedef int (iomap_dio_end_io_t)(struct kiocb *iocb, ssize_t ret, unsigned flags); ssize_t iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops, iomap_dio_end_io_t end_io); +bool iomap_dio_iopoll(struct kiocb *kiocb, bool wait); #ifdef CONFIG_SWAP struct file; -- 2.19.1