On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > *_lock_nested() is fundamentally broken; lockdep needs to check lock > ordering, but we cannot device a total ordering on an unbounded number > of elements with only a few subclasses. > > the replacement is to define lock ordering with a proper comparison > function. > > fs/pipe.c was already doing everything correctly otherwise, nothing > much changes here. > > Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> > Cc: Christian Brauner <brauner@xxxxxxxxxx> > Cc: Jan Kara <jack@xxxxxxx> > Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> I had to digest for a while what this new lockdep lock ordering feature is about. I have one pending question - what is the motivation of this conversion of pipe code? AFAIU we don't have any problems with lockdep annotations on pipe->mutex because there are always only two subclasses? Honza > --- > fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------ > 1 file changed, 36 insertions(+), 45 deletions(-) > > diff --git a/fs/pipe.c b/fs/pipe.c > index f1adbfe743d4..50c8a8596b52 100644 > --- a/fs/pipe.c > +++ b/fs/pipe.c > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; > * -- Manfred Spraul <manfred@xxxxxxxxxxxxxxxx> 2002-05-09 > */ > > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) > +#define cmp_int(l, r) ((l > r) - (l < r)) > + > +#ifdef CONFIG_PROVE_LOCKING > +static int pipe_lock_cmp_fn(const struct lockdep_map *a, > + const struct lockdep_map *b) > { > - if (pipe->files) > - mutex_lock_nested(&pipe->mutex, subclass); > + return cmp_int((unsigned long) a, (unsigned long) b); > } > +#endif > > void pipe_lock(struct pipe_inode_info *pipe) > { > - /* > - * pipe_lock() nests non-pipe inode locks (for writing to a file) > - */ > - pipe_lock_nested(pipe, I_MUTEX_PARENT); > + if (pipe->files) > + mutex_lock(&pipe->mutex); > } > EXPORT_SYMBOL(pipe_lock); > > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe) > } > EXPORT_SYMBOL(pipe_unlock); > > -static inline void __pipe_lock(struct pipe_inode_info *pipe) > -{ > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); > -} > - > -static inline void __pipe_unlock(struct pipe_inode_info *pipe) > -{ > - mutex_unlock(&pipe->mutex); > -} > - > void pipe_double_lock(struct pipe_inode_info *pipe1, > struct pipe_inode_info *pipe2) > { > BUG_ON(pipe1 == pipe2); > > - if (pipe1 < pipe2) { > - pipe_lock_nested(pipe1, I_MUTEX_PARENT); > - pipe_lock_nested(pipe2, I_MUTEX_CHILD); > - } else { > - pipe_lock_nested(pipe2, I_MUTEX_PARENT); > - pipe_lock_nested(pipe1, I_MUTEX_CHILD); > - } > + if (pipe1 > pipe2) > + swap(pipe1, pipe2); > + > + pipe_lock(pipe1); > + pipe_lock(pipe2); > } > > static void anon_pipe_buf_release(struct pipe_inode_info *pipe, > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > return 0; > > ret = 0; > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > /* > * We only wake up writers if the pipe was full when we started > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > ret = -EAGAIN; > break; > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > /* > * We only get here if we didn't actually read anything. > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) > return -ERESTARTSYS; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); > wake_next_reader = true; > } > if (pipe_empty(pipe->head, pipe->tail)) > wake_next_reader = false; > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > if (was_full) > wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > if (unlikely(total_len == 0)) > return 0; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > if (!pipe->readers) { > send_sig(SIGPIPE, current, 0); > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > * after waiting we need to re-check whether the pipe > * become empty while we dropped the lock. > */ > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > if (was_empty) > wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > was_empty = pipe_empty(pipe->head, pipe->tail); > wake_next_writer = true; > } > out: > if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) > wake_next_writer = false; > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > /* > * If we do do a wakeup event, we do a 'sync' wakeup, because we > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > switch (cmd) { > case FIONREAD: > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > count = 0; > head = pipe->head; > tail = pipe->tail; > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > count += pipe->bufs[tail & mask].len; > tail++; > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > return put_user(count, (int __user *)arg); > > #ifdef CONFIG_WATCH_QUEUE > case IOC_WATCH_QUEUE_SET_SIZE: { > int ret; > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > ret = watch_queue_set_size(pipe, arg); > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return ret; > } > > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file) > { > struct pipe_inode_info *pipe = file->private_data; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > if (file->f_mode & FMODE_READ) > pipe->readers--; > if (file->f_mode & FMODE_WRITE) > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file) > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > put_pipe_info(inode, pipe); > return 0; > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on) > struct pipe_inode_info *pipe = filp->private_data; > int retval = 0; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > if (filp->f_mode & FMODE_READ) > retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); > if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on) > /* this can happen only if on == T */ > fasync_helper(-1, filp, 0, &pipe->fasync_readers); > } > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return retval; > } > > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void) > pipe->nr_accounted = pipe_bufs; > pipe->user = user; > mutex_init(&pipe->mutex); > + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); > return pipe; > } > > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > filp->private_data = pipe; > /* OK, we have a pipe and it's pinned down */ > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > /* We can only do regular read/write on fifos */ > stream_open(inode, filp); > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > } > > /* Ok! */ > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return 0; > > err_rd: > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > goto err; > > err: > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > > put_pipe_info(inode, pipe); > return ret; > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > if (!pipe) > return -EBADF; > > - __pipe_lock(pipe); > + mutex_lock(&pipe->mutex); > > switch (cmd) { > case F_SETPIPE_SZ: > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > break; > } > > - __pipe_unlock(pipe); > + mutex_unlock(&pipe->mutex); > return ret; > } > > -- > 2.43.0 > -- Jan Kara <jack@xxxxxxxx> SUSE Labs, CR