On Mon, Mar 05, 2018 at 01:27:14PM -0800, Christoph Hellwig wrote: > The upcoming aio poll support would like to be able to complete the > iocb inline from the cancellation context, but that would cause > a lock order reversal. Add support for optionally moving the cancelation > outside the context lock to avoid this reversal. I started to wonder which lock order reversal the commit message refers to? I think the reason for adding delayed cancellations is that we want to be able to call io_cancel -> kiocb_cancel -> aio_poll_cancel -> aio_complete without double locking ctx_lock? --D > Signed-off-by: Christoph Hellwig <hch@xxxxxx> > Acked-by: Jeff Moyer <jmoyer@xxxxxxxxxx> > --- > fs/aio.c | 49 ++++++++++++++++++++++++++++++++++++++----------- > 1 file changed, 38 insertions(+), 11 deletions(-) > > diff --git a/fs/aio.c b/fs/aio.c > index 0b6394b4e528..9d7d6e4cde87 100644 > --- a/fs/aio.c > +++ b/fs/aio.c > @@ -170,6 +170,10 @@ struct aio_kiocb { > struct list_head ki_list; /* the aio core uses this > * for cancellation */ > > + unsigned int flags; /* protected by ctx->ctx_lock */ > +#define AIO_IOCB_DELAYED_CANCEL (1 << 0) > +#define AIO_IOCB_CANCELLED (1 << 1) > + > /* > * If the aio_resfd field of the userspace iocb is not zero, > * this is the underlying eventfd context to deliver events to. > @@ -536,9 +540,9 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events) > #define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event)) > #define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE) > > -void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) > +static void __kiocb_set_cancel_fn(struct aio_kiocb *req, > + kiocb_cancel_fn *cancel, unsigned int iocb_flags) > { > - struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw); > struct kioctx *ctx = req->ki_ctx; > unsigned long flags; > > @@ -548,8 +552,15 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) > spin_lock_irqsave(&ctx->ctx_lock, flags); > list_add_tail(&req->ki_list, &ctx->active_reqs); > req->ki_cancel = cancel; > + req->flags |= iocb_flags; > spin_unlock_irqrestore(&ctx->ctx_lock, flags); > } > + > +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel) > +{ > + return __kiocb_set_cancel_fn(container_of(iocb, struct aio_kiocb, rw), > + cancel, 0); > +} > EXPORT_SYMBOL(kiocb_set_cancel_fn); > > /* > @@ -603,17 +614,27 @@ static void free_ioctx_users(struct percpu_ref *ref) > { > struct kioctx *ctx = container_of(ref, struct kioctx, users); > struct aio_kiocb *req; > + LIST_HEAD(list); > > spin_lock_irq(&ctx->ctx_lock); > - > while (!list_empty(&ctx->active_reqs)) { > req = list_first_entry(&ctx->active_reqs, > struct aio_kiocb, ki_list); > - kiocb_cancel(req); > - } > > + if (req->flags & AIO_IOCB_DELAYED_CANCEL) { > + req->flags |= AIO_IOCB_CANCELLED; > + list_move_tail(&req->ki_list, &list); > + } else { > + kiocb_cancel(req); > + } > + } > spin_unlock_irq(&ctx->ctx_lock); > > + while (!list_empty(&list)) { > + req = list_first_entry(&list, struct aio_kiocb, ki_list); > + kiocb_cancel(req); > + } > + > percpu_ref_kill(&ctx->reqs); > percpu_ref_put(&ctx->reqs); > } > @@ -1785,15 +1806,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, > if (unlikely(!ctx)) > return -EINVAL; > > - spin_lock_irq(&ctx->ctx_lock); > + ret = -EINVAL; > > + spin_lock_irq(&ctx->ctx_lock); > kiocb = lookup_kiocb(ctx, iocb, key); > + if (kiocb) { > + if (kiocb->flags & AIO_IOCB_DELAYED_CANCEL) { > + kiocb->flags |= AIO_IOCB_CANCELLED; > + } else { > + ret = kiocb_cancel(kiocb); > + kiocb = NULL; > + } > + } > + spin_unlock_irq(&ctx->ctx_lock); > + > if (kiocb) > ret = kiocb_cancel(kiocb); > - else > - ret = -EINVAL; > - > - spin_unlock_irq(&ctx->ctx_lock); > > if (!ret) { > /* > @@ -1805,7 +1833,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb, > } > > percpu_ref_put(&ctx->users); > - > return ret; > } > > -- > 2.14.2 >