Re: [PATCH] pipe: Fix endless sleep problem due to the out-of-order

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Gentel ping....

On 2021/12/12 0:51, miaoxie (A) wrote:
> Thers is a out-of-order access problem which would cause endless sleep
> when we use pipe with epoll.
> 
> The story is following, we assume the ring size is 2, the ring head
> is 1, the ring tail is 0, task0 is write task, task1 is read task,
> task2 is write task.
> Task0					Task1		Task2
> epoll_ctl(fd, EPOLL_CTL_ADD, ...)
>   pipe_poll()
>     poll_wait()
>     tail = READ_ONCE(pipe->tail);
>     	// Re-order and get tail=0
> 				  	pipe_read
> 					tail++ //tail=1
> 							pipe_write
> 							head++ //head=2
>     head = READ_ONCE(pipe->head);
>     	// head = 2
>     check ring is full by head - tail
> Task0 get head = 2 and tail = 0, so it mistake that the pipe ring is
> full, then task0 is not add into ready list. If the ring is not full
> anymore, task0 would not be woken up forever
> 
> The reason of this problem is that we got inconsistent head/tail value
> of the pipe ring, so we fix the problem by getting them by atomic.
> 
> It seems that pipe_readable and pipe_writable is safe, so we don't
> change them.
> 
> Signed-off-by: Miao Xie <miaoxie@xxxxxxxxxx>
> ---
>  fs/pipe.c                 |  6 ++++--
>  include/linux/pipe_fs_i.h | 32 ++++++++++++++++++++++++++++++--
>  2 files changed, 34 insertions(+), 4 deletions(-)
> 
> diff --git a/fs/pipe.c b/fs/pipe.c
> index 6d4342bad9f1..454056b1eaad 100644
> --- a/fs/pipe.c
> +++ b/fs/pipe.c
> @@ -649,6 +649,7 @@ pipe_poll(struct file *filp, poll_table *wait)
>  	__poll_t mask;
>  	struct pipe_inode_info *pipe = filp->private_data;
>  	unsigned int head, tail;
> +	u64 ring_idxs;
>  
>  	/* Epoll has some historical nasty semantics, this enables them */
>  	pipe->poll_usage = 1;
> @@ -669,8 +670,9 @@ pipe_poll(struct file *filp, poll_table *wait)
>  	 * if something changes and you got it wrong, the poll
>  	 * table entry will wake you up and fix it.
>  	 */
> -	head = READ_ONCE(pipe->head);
> -	tail = READ_ONCE(pipe->tail);
> +	ring_idxs = (u64)atomic64_read(&pipe->ring_idxs);
> +	head = pipe_get_ring_head(ring_idxs);
> +	tail = pipe_get_ring_tail(ring_idxs);
>  
>  	mask = 0;
>  	if (filp->f_mode & FMODE_READ) {
> diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
> index fc5642431b92..9a7cb8077dc8 100644
> --- a/include/linux/pipe_fs_i.h
> +++ b/include/linux/pipe_fs_i.h
> @@ -58,8 +58,18 @@ struct pipe_buffer {
>  struct pipe_inode_info {
>  	struct mutex mutex;
>  	wait_queue_head_t rd_wait, wr_wait;
> -	unsigned int head;
> -	unsigned int tail;
> +	union {
> +		/*
> +		 * If someone want to change this structure, you should also
> +		 * change the macro *PIPE_GET_DOORBELL* that is used to
> +		 * generate the ring head/tail access function.
> +		 */
> +		struct {
> +			unsigned int head;
> +			unsigned int tail;
> +		};
> +		atomic64_t ring_idxs;
> +	};
>  	unsigned int max_usage;
>  	unsigned int ring_size;
>  #ifdef CONFIG_WATCH_QUEUE
> @@ -82,6 +92,24 @@ struct pipe_inode_info {
>  #endif
>  };
>  
> +#define PIPE_GET_DOORBELL(bellname)					\
> +static inline unsigned int pipe_get_ring_##bellname(u64 ring_idxs)	\
> +{									\
> +	unsigned int doorbell;						\
> +	unsigned char *ptr = ((char *)&ring_idxs);			\
> +	int offset;							\
> +									\
> +	offset = (int)offsetof(struct pipe_inode_info, bellname);	\
> +	offset -= (int)offsetof(struct pipe_inode_info, ring_idxs);	\
> +	ptr += offset;							\
> +	doorbell = *((unsigned int *)ptr);				\
> +									\
> +	return doorbell;						\
> +}
> +
> +PIPE_GET_DOORBELL(head)
> +PIPE_GET_DOORBELL(tail)
> +
>  /*
>   * Note on the nesting of these functions:
>   *
> 



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux