Re: [PATCH 3/3] io_uring: allow POLL_ADD with double poll_wait() users

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

There is a couple of comments below

On 2/12/2020 11:25 PM, Jens Axboe wrote:
> Some file descriptors use separate waitqueues for their f_ops->poll()
> handler, most commonly one for read and one for write. The io_uring
> poll implementation doesn't work with that, as the 2nd poll_wait()
> call will cause the io_uring poll request to -EINVAL.
> 
> This is particularly a problem now that pipes were switched to using
> multiple wait queues (commit 0ddad21d3e99), but it also affects tty
> devices and /dev/random as well. This is a big problem for event loops
> where some file descriptors work, and others don't.
> 
> With this fix, io_uring handles multiple waitqueues.
> 
> Signed-off-by: Jens Axboe <axboe@xxxxxxxxx>
> ---
>  fs/io_uring.c | 79 +++++++++++++++++++++++++++++++++++++++++++++++----
>  1 file changed, 74 insertions(+), 5 deletions(-)
> 
> diff --git a/fs/io_uring.c b/fs/io_uring.c
> index 9cd2ce3b8ad9..9f00f30e1790 100644
> --- a/fs/io_uring.c
> +++ b/fs/io_uring.c
> @@ -3440,10 +3440,27 @@ static int io_connect(struct io_kiocb *req, struct io_kiocb **nxt,
>  #endif
>  }
>  
> +static void io_poll_remove_double(struct io_kiocb *req)
> +{
> +	struct io_poll_iocb *poll = (struct io_poll_iocb *) req->io;
> +
> +	if (poll && poll->head) {
> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&poll->head->lock, flags);
> +		list_del_init(&poll->wait.entry);
> +		if (poll->wait.private)
> +			refcount_dec(&req->refs);
> +		spin_unlock_irqrestore(&poll->head->lock, flags);
> +	}
> +}
> +
>  static void io_poll_remove_one(struct io_kiocb *req)
>  {
>  	struct io_poll_iocb *poll = &req->poll;
>  
> +	io_poll_remove_double(req);
> +
>  	spin_lock(&poll->head->lock);
>  	WRITE_ONCE(poll->canceled, true);
>  	if (!list_empty(&poll->wait.entry)) {
> @@ -3679,10 +3696,38 @@ static int io_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
>  	if (mask && !(mask & poll->events))
>  		return 0;
>  
> +	io_poll_remove_double(req);
>  	__io_poll_wake(req, &req->poll, mask);
>  	return 1;
>  }
>  
> +static int io_poll_double_wake(struct wait_queue_entry *wait, unsigned mode,
> +			       int sync, void *key)
> +{
> +	struct io_kiocb *req = wait->private;
> +	struct io_poll_iocb *poll = (void *) req->io;
> +	__poll_t mask = key_to_poll(key);
> +	bool done = true;
> +
> +	/* for instances that support it check for an event match first: */
> +	if (mask && !(mask & poll->events))
> +		return 0;
> +
> +	if (req->poll.head) {

Can there be concurrent problems?

1. io_poll_wake() -> io_poll_remove_double() is working
   awhile the second io_poll_queue_proc() is called.
   Then there will be a race for req->io

2. concurrent io_poll_wake() and io_poll_double_wake()


> +		unsigned long flags;
> +
> +		spin_lock_irqsave(&req->poll.head->lock, flags);
> +		done = list_empty(&req->poll.wait.entry);
> +		if (!done)
> +			list_del_init(&req->poll.wait.entry);
> +		spin_unlock_irqrestore(&req->poll.head->lock, flags);
> +	}
> +	if (!done)
> +		__io_poll_wake(req, poll, mask);

It's always false if we didn't hit the block under `req->poll.head`, so
it may be placed there along with @done declaration.

> +	refcount_dec(&req->refs);
> +	return 1;
> +}
> +
>  struct io_poll_table {
>  	struct poll_table_struct pt;
>  	struct io_kiocb *req;
> @@ -3693,15 +3738,38 @@ static void io_poll_queue_proc(struct file *file, struct wait_queue_head *head,
>  			       struct poll_table_struct *p)
>  {

May this be called concurrently? (at least in theory)

>  	struct io_poll_table *pt = container_of(p, struct io_poll_table, pt);
> +	struct io_kiocb *req = pt->req;
> +	struct io_poll_iocb *poll = &req->poll;
>  
> -	if (unlikely(pt->req->poll.head)) {
> -		pt->error = -EINVAL;
> -		return;
> +	/*
> +	 * If poll->head is already set, it's because the file being polled
> +	 * use multiple waitqueues for poll handling (eg one for read, one
> +	 * for write). Setup a separate io_poll_iocb if this happens.
> +	 */
> +	if (unlikely(poll->head)) {
> +		/* already have a 2nd entry, fail a third attempt */
> +		if (req->io) {
> +			pt->error = -EINVAL;
> +			return;
> +		}
> +		poll = kmalloc(sizeof(*poll), GFP_ATOMIC);

Don't see where this is freed

> +		if (!poll) {
> +			pt->error = -ENOMEM;
> +			return;
> +		}
> +		poll->done = false;
> +		poll->canceled = false;
> +		poll->events = req->poll.events;
> +		INIT_LIST_HEAD(&poll->wait.entry);
> +		init_waitqueue_func_entry(&poll->wait, io_poll_double_wake);
> +		refcount_inc(&req->refs);
> +		poll->wait.private = req;
> +		req->io = (void *) poll;
>  	}
>  
>  	pt->error = 0;
> -	pt->req->poll.head = head;
> -	add_wait_queue(head, &pt->req->poll.wait);
> +	poll->head = head;
> +	add_wait_queue(head, &poll->wait);
>  }
>  
>  static void io_poll_req_insert(struct io_kiocb *req)
> @@ -3778,6 +3846,7 @@ static int io_poll_add(struct io_kiocb *req, struct io_kiocb **nxt)
>  	}
>  	if (mask) { /* no async, we'd stolen it */
>  		ipt.error = 0;
> +		io_poll_remove_double(req);
>  		io_poll_complete(req, mask, 0);
>  	}
>  	spin_unlock_irq(&ctx->completion_lock);
> 

-- 
Pavel Begunkov



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux