Re: INFO: task hung in pipe_read (2)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2020/08/11 4:29, Andrea Arcangeli wrote:
> However once the mutex is killable there's no concern anymore and the
> hangcheck timer is correct also not reporting any misbehavior anymore.

Do you mean something like below untested patch? I think that the difficult
part is that mutex for close() operation can't become killable. And I worry
that syzbot soon reports a hung task at pipe_release() instead of pipe_read()
or pipe_write(). If pagefault with locks held can be avoided, there will be no
such worry.

 fs/pipe.c                 | 106 ++++++++++++++++++++++++++++++++++++++--------
 fs/splice.c               |  41 ++++++++++++------
 include/linux/pipe_fs_i.h |   5 ++-
 3 files changed, 120 insertions(+), 32 deletions(-)

diff --git a/fs/pipe.c b/fs/pipe.c
index 60dbee4..537d1ef 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -66,6 +66,13 @@ static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass)
 		mutex_lock_nested(&pipe->mutex, subclass);
 }
 
+static int __must_check pipe_lock_killable_nested(struct pipe_inode_info *pipe, int subclass)
+{
+	if (pipe->files)
+		return mutex_lock_killable_nested(&pipe->mutex, subclass);
+	return 0;
+}
+
 void pipe_lock(struct pipe_inode_info *pipe)
 {
 	/*
@@ -75,6 +82,14 @@ void pipe_lock(struct pipe_inode_info *pipe)
 }
 EXPORT_SYMBOL(pipe_lock);
 
+int pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+	/*
+	 * pipe_lock() nests non-pipe inode locks (for writing to a file)
+	 */
+	return pipe_lock_killable_nested(pipe, I_MUTEX_PARENT);
+}
+
 void pipe_unlock(struct pipe_inode_info *pipe)
 {
 	if (pipe->files)
@@ -87,23 +102,37 @@ static inline void __pipe_lock(struct pipe_inode_info *pipe)
 	mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT);
 }
 
+static inline int __must_check __pipe_lock_killable(struct pipe_inode_info *pipe)
+{
+	return mutex_lock_killable_nested(&pipe->mutex, I_MUTEX_PARENT);
+}
+
 static inline void __pipe_unlock(struct pipe_inode_info *pipe)
 {
 	mutex_unlock(&pipe->mutex);
 }
 
-void pipe_double_lock(struct pipe_inode_info *pipe1,
-		      struct pipe_inode_info *pipe2)
+int pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+			      struct pipe_inode_info *pipe2)
 {
 	BUG_ON(pipe1 == pipe2);
 
 	if (pipe1 < pipe2) {
-		pipe_lock_nested(pipe1, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe2, I_MUTEX_CHILD);
+		if (pipe_lock_killable_nested(pipe1, I_MUTEX_PARENT))
+			return -ERESTARTSYS;
+		if (pipe_lock_killable_nested(pipe2, I_MUTEX_CHILD)) {
+			pipe_unlock(pipe1);
+			return -ERESTARTSYS;
+		}
 	} else {
-		pipe_lock_nested(pipe2, I_MUTEX_PARENT);
-		pipe_lock_nested(pipe1, I_MUTEX_CHILD);
+		if (pipe_lock_killable_nested(pipe2, I_MUTEX_PARENT))
+			return -ERESTARTSYS;
+		if (pipe_lock_killable_nested(pipe1, I_MUTEX_CHILD)) {
+			pipe_unlock(pipe2);
+			return -ERESTARTSYS;
+		}
 	}
+	return 0;
 }
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
@@ -125,6 +154,24 @@ void pipe_wait(struct pipe_inode_info *pipe)
 	pipe_lock(pipe);
 }
 
+int pipe_wait_killable(struct pipe_inode_info *pipe)
+{
+	DEFINE_WAIT(rdwait);
+	DEFINE_WAIT(wrwait);
+
+	/*
+	 * Pipes are system-local resources, so sleeping on them
+	 * is considered a noninteractive wait:
+	 */
+	prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE);
+	prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE);
+	pipe_unlock(pipe);
+	schedule();
+	finish_wait(&pipe->rd_wait, &rdwait);
+	finish_wait(&pipe->wr_wait, &wrwait);
+	return pipe_lock_killable(pipe);
+}
+
 static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
 				  struct pipe_buffer *buf)
 {
@@ -244,7 +291,8 @@ static inline bool pipe_readable(const struct pipe_inode_info *pipe)
 		return 0;
 
 	ret = 0;
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	/*
 	 * We only wake up writers if the pipe was full when we started
@@ -381,7 +429,8 @@ static inline bool pipe_readable(const struct pipe_inode_info *pipe)
 		if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0)
 			return -ERESTARTSYS;
 
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
 		wake_next_reader = true;
 	}
@@ -432,7 +481,8 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 	if (unlikely(total_len == 0))
 		return 0;
 
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	if (!pipe->readers) {
 		send_sig(SIGPIPE, current, 0);
@@ -577,7 +627,14 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 			kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
 		}
 		wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe));
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe)) {
+			if (!ret)
+				ret = -ERESTARTSYS;
+			/* Extra notification is better than missing notification? */
+			was_empty = true;
+			wake_next_writer = true;
+			goto out_unlocked;
+		}
 		was_empty = pipe_empty(pipe->head, pipe->tail);
 		wake_next_writer = true;
 	}
@@ -586,6 +643,7 @@ static inline bool pipe_writable(const struct pipe_inode_info *pipe)
 		wake_next_writer = false;
 	__pipe_unlock(pipe);
 
+out_unlocked:
 	/*
 	 * If we do do a wakeup event, we do a 'sync' wakeup, because we
 	 * want the reader to start processing things asap, rather than
@@ -617,7 +675,8 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	switch (cmd) {
 	case FIONREAD:
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		count = 0;
 		head = pipe->head;
 		tail = pipe->tail;
@@ -634,7 +693,8 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 #ifdef CONFIG_WATCH_QUEUE
 	case IOC_WATCH_QUEUE_SET_SIZE: {
 		int ret;
-		__pipe_lock(pipe);
+		if (__pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		ret = watch_queue_set_size(pipe, arg);
 		__pipe_unlock(pipe);
 		return ret;
@@ -719,6 +779,10 @@ static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 {
 	struct pipe_inode_info *pipe = file->private_data;
 
+	/*
+	 * Think lock can't be killable. How to avoid deadlock if page fault
+	 * with pipe mutex held does not finish quickly?
+	 */
 	__pipe_lock(pipe);
 	if (file->f_mode & FMODE_READ)
 		pipe->readers--;
@@ -744,7 +808,8 @@ static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe)
 	struct pipe_inode_info *pipe = filp->private_data;
 	int retval = 0;
 
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe)) /* Can this be safely killable? */
+		return -ERESTARTSYS;
 	if (filp->f_mode & FMODE_READ)
 		retval = fasync_helper(fd, filp, on, &pipe->fasync_readers);
 	if ((filp->f_mode & FMODE_WRITE) && retval >= 0) {
@@ -1040,7 +1105,8 @@ static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt)
 	int cur = *cnt;
 
 	while (cur == *cnt) {
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			break;
 		if (signal_pending(current))
 			break;
 	}
@@ -1083,10 +1149,13 @@ static int fifo_open(struct inode *inode, struct file *filp)
 			spin_unlock(&inode->i_lock);
 		}
 	}
-	filp->private_data = pipe;
-	/* OK, we have a pipe and it's pinned down */
 
-	__pipe_lock(pipe);
+	/* OK, we have a pipe and it's pinned down */
+	if (__pipe_lock_killable(pipe)) {
+		put_pipe_info(inode, pipe);
+		return -ERESTARTSYS;
+	}
+	filp->private_data = pipe;
 
 	/* We can only do regular read/write on fifos */
 	stream_open(inode, filp);
@@ -1349,7 +1418,8 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	if (!pipe)
 		return -EBADF;
 
-	__pipe_lock(pipe);
+	if (__pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	switch (cmd) {
 	case F_SETPIPE_SZ:
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c..65d24df 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -563,7 +563,8 @@ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_des
 			sd->need_wakeup = false;
 		}
 
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 
 	return 1;
@@ -657,7 +658,8 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		.u.file = out,
 	};
 
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 	ret = __splice_from_pipe(pipe, &sd, actor);
 	pipe_unlock(pipe);
 
@@ -696,7 +698,10 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	if (unlikely(!array))
 		return -ENOMEM;
 
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe)) {
+		kfree(array);
+		return -ERESTARTSYS;
+	}
 
 	splice_from_pipe_begin(&sd);
 	while (sd.total_len) {
@@ -1077,7 +1082,8 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags)
 			return -EAGAIN;
 		if (signal_pending(current))
 			return -ERESTARTSYS;
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 }
 
@@ -1167,7 +1173,8 @@ long do_splice(struct file *in, loff_t __user *off_in,
 		if (out->f_flags & O_NONBLOCK)
 			flags |= SPLICE_F_NONBLOCK;
 
-		pipe_lock(opipe);
+		if (pipe_lock_killable(opipe))
+			return -ERESTARTSYS;
 		ret = wait_for_space(opipe, flags);
 		if (!ret) {
 			unsigned int p_space;
@@ -1264,7 +1271,8 @@ static long vmsplice_to_user(struct file *file, struct iov_iter *iter,
 		return -EBADF;
 
 	if (sd.total_len) {
-		pipe_lock(pipe);
+		if (pipe_lock_killable(pipe))
+			return -ERESTARTSYS;
 		ret = __splice_from_pipe(pipe, &sd, pipe_to_user);
 		pipe_unlock(pipe);
 	}
@@ -1291,7 +1299,8 @@ static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter,
 	if (!pipe)
 		return -EBADF;
 
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 	ret = wait_for_space(pipe, flags);
 	if (!ret)
 		ret = iter_to_pipe(iter, pipe, buf_flag);
@@ -1441,7 +1450,8 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		return 0;
 
 	ret = 0;
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	while (pipe_empty(pipe->head, pipe->tail)) {
 		if (signal_pending(current)) {
@@ -1454,7 +1464,8 @@ static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 			ret = -EAGAIN;
 			break;
 		}
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 
 	pipe_unlock(pipe);
@@ -1477,7 +1488,8 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 		return 0;
 
 	ret = 0;
-	pipe_lock(pipe);
+	if (pipe_lock_killable(pipe))
+		return -ERESTARTSYS;
 
 	while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) {
 		if (!pipe->readers) {
@@ -1493,7 +1505,8 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags)
 			ret = -ERESTARTSYS;
 			break;
 		}
-		pipe_wait(pipe);
+		if (pipe_wait_killable(pipe))
+			return -ERESTARTSYS;
 	}
 
 	pipe_unlock(pipe);
@@ -1529,7 +1542,8 @@ static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by pipe info address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	pipe_double_lock(ipipe, opipe);
+	if (pipe_double_lock_killable(ipipe, opipe))
+		return -ERESTARTSYS;
 
 	i_tail = ipipe->tail;
 	i_mask = ipipe->ring_size - 1;
@@ -1655,7 +1669,8 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by pipe info address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	pipe_double_lock(ipipe, opipe);
+	if (pipe_double_lock_killable(ipipe, opipe))
+		return -ERESTARTSYS;
 
 	i_tail = ipipe->tail;
 	i_mask = ipipe->ring_size - 1;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 50afd0d..eb99c18 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -233,8 +233,10 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
 
 /* Pipe lock and unlock operations */
 void pipe_lock(struct pipe_inode_info *);
+int __must_check pipe_lock_killable(struct pipe_inode_info *pipe);
 void pipe_unlock(struct pipe_inode_info *);
-void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
+int __must_check pipe_double_lock_killable(struct pipe_inode_info *pipe1,
+					   struct pipe_inode_info *pipe2);
 
 extern unsigned int pipe_max_size;
 extern unsigned long pipe_user_pages_hard;
@@ -242,6 +244,7 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
 
 /* Drop the inode semaphore and wait for a pipe event, atomically */
 void pipe_wait(struct pipe_inode_info *pipe);
+int __must_check pipe_wait_killable(struct pipe_inode_info *pipe);
 
 struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
-- 
1.8.3.1




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux