Re: [PATCH 1/2] epoll: use the waitqueue lock to protect ep->wq

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/06/2017 06:52 PM, Christoph Hellwig wrote:
> The eoll code currently always uses the unlocked waitqueue helpers for
> ep->wq, but instead of holding the lock inside the waitqueue around these
> calls, as expected by the epoll code uses its own lock.  Given that the
> waitqueue is not exposed to the rest of the kernel this actually works
> ok at the moment, but prevents the epoll locking rules from being
> enforced using lockdep.  Remove ep->lock and use the waitqueue lock
> to not only reduce the size of struct eventpoll but also make sure we
> can assert locking invariations in the waitqueue code.
> 
> Signed-off-by: Christoph Hellwig <hch@xxxxxx>

Probably should also fix the locking comments at the top of
fs/eventpoll.c that refer to ep->lock...

The rest looks good.

Reviewed-by: Jason Baron <jbaron@xxxxxxxxxx>

Thanks,

-Jason


> ---
>  fs/eventpoll.c | 46 ++++++++++++++++++++++------------------------
>  1 file changed, 22 insertions(+), 24 deletions(-)
> 
> diff --git a/fs/eventpoll.c b/fs/eventpoll.c
> index afd548ebc328..2b2c5ac80e26 100644
> --- a/fs/eventpoll.c
> +++ b/fs/eventpoll.c
> @@ -182,11 +182,10 @@ struct epitem {
>   * This structure is stored inside the "private_data" member of the file
>   * structure and represents the main data structure for the eventpoll
>   * interface.
> + *
> + * Access to it is protected by the lock inside wq.
>   */
>  struct eventpoll {
> -	/* Protect the access to this structure */
> -	spinlock_t lock;
> -
>  	/*
>  	 * This mutex is used to ensure that files are not removed
>  	 * while epoll is using them. This is held during the event
> @@ -210,7 +209,7 @@ struct eventpoll {
>  	/*
>  	 * This is a single linked list that chains all the "struct epitem" that
>  	 * happened while transferring ready events to userspace w/out
> -	 * holding ->lock.
> +	 * holding ->wq.lock.
>  	 */
>  	struct epitem *ovflist;
>  
> @@ -686,17 +685,17 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  	 * because we want the "sproc" callback to be able to do it
>  	 * in a lockless way.
>  	 */
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	list_splice_init(&ep->rdllist, &txlist);
>  	ep->ovflist = NULL;
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	/*
>  	 * Now call the callback function.
>  	 */
>  	error = (*sproc)(ep, &txlist, priv);
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	/*
>  	 * During the time we spent inside the "sproc" callback, some
>  	 * other events might have been queued by the poll callback.
> @@ -738,7 +737,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
>  		if (waitqueue_active(&ep->poll_wait))
>  			pwake++;
>  	}
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	if (!ep_locked)
>  		mutex_unlock(&ep->mtx);
> @@ -782,10 +781,10 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
>  
>  	rb_erase_cached(&epi->rbn, &ep->rbr);
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	if (ep_is_linked(&epi->rdllink))
>  		list_del_init(&epi->rdllink);
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	wakeup_source_unregister(ep_wakeup_source(epi));
>  	/*
> @@ -1015,7 +1014,6 @@ static int ep_alloc(struct eventpoll **pep)
>  	if (unlikely(!ep))
>  		goto free_uid;
>  
> -	spin_lock_init(&ep->lock);
>  	mutex_init(&ep->mtx);
>  	init_waitqueue_head(&ep->wq);
>  	init_waitqueue_head(&ep->poll_wait);
> @@ -1119,7 +1117,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
>  	struct eventpoll *ep = epi->ep;
>  	int ewake = 0;
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  
>  	ep_set_busy_poll_napi_id(epi);
>  
> @@ -1196,7 +1194,7 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v
>  		pwake++;
>  
>  out_unlock:
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	/* We have to call this outside the lock */
>  	if (pwake)
> @@ -1480,7 +1478,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  		goto error_remove_epi;
>  
>  	/* We have to drop the new item inside our item list to keep track of it */
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  
>  	/* record NAPI ID of new item if present */
>  	ep_set_busy_poll_napi_id(epi);
> @@ -1497,7 +1495,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  			pwake++;
>  	}
>  
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	atomic_long_inc(&ep->user->epoll_watches);
>  
> @@ -1523,10 +1521,10 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
>  	 * list, since that is used/cleaned only inside a section bound by "mtx".
>  	 * And ep_insert() is called with "mtx" held.
>  	 */
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  	if (ep_is_linked(&epi->rdllink))
>  		list_del_init(&epi->rdllink);
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	wakeup_source_unregister(ep_wakeup_source(epi));
>  
> @@ -1593,7 +1591,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  	 * list, push it inside.
>  	 */
>  	if (revents & event->events) {
> -		spin_lock_irq(&ep->lock);
> +		spin_lock_irq(&ep->wq.lock);
>  		if (!ep_is_linked(&epi->rdllink)) {
>  			list_add_tail(&epi->rdllink, &ep->rdllist);
>  			ep_pm_stay_awake(epi);
> @@ -1604,7 +1602,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
>  			if (waitqueue_active(&ep->poll_wait))
>  				pwake++;
>  		}
> -		spin_unlock_irq(&ep->lock);
> +		spin_unlock_irq(&ep->wq.lock);
>  	}
>  
>  	/* We have to call this outside the lock */
> @@ -1754,7 +1752,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  		 * caller specified a non blocking operation.
>  		 */
>  		timed_out = 1;
> -		spin_lock_irqsave(&ep->lock, flags);
> +		spin_lock_irqsave(&ep->wq.lock, flags);
>  		goto check_events;
>  	}
>  
> @@ -1763,7 +1761,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  	if (!ep_events_available(ep))
>  		ep_busy_loop(ep, timed_out);
>  
> -	spin_lock_irqsave(&ep->lock, flags);
> +	spin_lock_irqsave(&ep->wq.lock, flags);
>  
>  	if (!ep_events_available(ep)) {
>  		/*
> @@ -1805,11 +1803,11 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  				break;
>  			}
>  
> -			spin_unlock_irqrestore(&ep->lock, flags);
> +			spin_unlock_irqrestore(&ep->wq.lock, flags);
>  			if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS))
>  				timed_out = 1;
>  
> -			spin_lock_irqsave(&ep->lock, flags);
> +			spin_lock_irqsave(&ep->wq.lock, flags);
>  		}
>  
>  		__remove_wait_queue(&ep->wq, &wait);
> @@ -1819,7 +1817,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
>  	/* Is it worth to try to dig for events ? */
>  	eavail = ep_events_available(ep);
>  
> -	spin_unlock_irqrestore(&ep->lock, flags);
> +	spin_unlock_irqrestore(&ep->wq.lock, flags);
>  
>  	/*
>  	 * Try to transfer events to user space. In case we get 0 events and
> 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux