On Fri, Feb 2, 2024 at 1:12 PM Jan Kara <jack@xxxxxxx> wrote: > > On Fri 26-01-24 21:08:28, Kent Overstreet wrote: > > *_lock_nested() is fundamentally broken; lockdep needs to check lock > > ordering, but we cannot device a total ordering on an unbounded number > > of elements with only a few subclasses. > > > > the replacement is to define lock ordering with a proper comparison > > function. > > > > fs/pipe.c was already doing everything correctly otherwise, nothing > > much changes here. > > > > Cc: Alexander Viro <viro@xxxxxxxxxxxxxxxxxx> > > Cc: Christian Brauner <brauner@xxxxxxxxxx> > > Cc: Jan Kara <jack@xxxxxxx> > > Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> > > I had to digest for a while what this new lockdep lock ordering feature is > about. I have one pending question - what is the motivation of this > conversion of pipe code? AFAIU we don't have any problems with lockdep > annotations on pipe->mutex because there are always only two subclasses? > > Honza Hi, "Numbers talk - Bullshit walks." (Linus Torvalds) In things of pipes - I normally benchmark like this (example): root# cat /dev/sdc | pipebench > /dev/null Do you have numbers for your patch-series? Thanks. BG, -Sedat- [1] https://packages.debian.org/pipebench > > > --- > > fs/pipe.c | 81 +++++++++++++++++++++++++------------------------------ > > 1 file changed, 36 insertions(+), 45 deletions(-) > > > > diff --git a/fs/pipe.c b/fs/pipe.c > > index f1adbfe743d4..50c8a8596b52 100644 > > --- a/fs/pipe.c > > +++ b/fs/pipe.c > > @@ -76,18 +76,20 @@ static unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; > > * -- Manfred Spraul <manfred@xxxxxxxxxxxxxxxx> 2002-05-09 > > */ > > > > -static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) > > +#define cmp_int(l, r) ((l > r) - (l < r)) > > + > > +#ifdef CONFIG_PROVE_LOCKING > > +static int pipe_lock_cmp_fn(const struct lockdep_map *a, > > + const struct lockdep_map *b) > > { > > - if (pipe->files) > > - mutex_lock_nested(&pipe->mutex, subclass); > > + return cmp_int((unsigned long) a, (unsigned long) b); > > } > > +#endif > > > > void pipe_lock(struct pipe_inode_info *pipe) > > { > > - /* > > - * pipe_lock() nests non-pipe inode locks (for writing to a file) > > - */ > > - pipe_lock_nested(pipe, I_MUTEX_PARENT); > > + if (pipe->files) > > + mutex_lock(&pipe->mutex); > > } > > EXPORT_SYMBOL(pipe_lock); > > > > @@ -98,28 +100,16 @@ void pipe_unlock(struct pipe_inode_info *pipe) > > } > > EXPORT_SYMBOL(pipe_unlock); > > > > -static inline void __pipe_lock(struct pipe_inode_info *pipe) > > -{ > > - mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); > > -} > > - > > -static inline void __pipe_unlock(struct pipe_inode_info *pipe) > > -{ > > - mutex_unlock(&pipe->mutex); > > -} > > - > > void pipe_double_lock(struct pipe_inode_info *pipe1, > > struct pipe_inode_info *pipe2) > > { > > BUG_ON(pipe1 == pipe2); > > > > - if (pipe1 < pipe2) { > > - pipe_lock_nested(pipe1, I_MUTEX_PARENT); > > - pipe_lock_nested(pipe2, I_MUTEX_CHILD); > > - } else { > > - pipe_lock_nested(pipe2, I_MUTEX_PARENT); > > - pipe_lock_nested(pipe1, I_MUTEX_CHILD); > > - } > > + if (pipe1 > pipe2) > > + swap(pipe1, pipe2); > > + > > + pipe_lock(pipe1); > > + pipe_lock(pipe2); > > } > > > > static void anon_pipe_buf_release(struct pipe_inode_info *pipe, > > @@ -271,7 +261,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > return 0; > > > > ret = 0; > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > /* > > * We only wake up writers if the pipe was full when we started > > @@ -368,7 +358,7 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > ret = -EAGAIN; > > break; > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > /* > > * We only get here if we didn't actually read anything. > > @@ -400,13 +390,13 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to) > > if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) > > return -ERESTARTSYS; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); > > wake_next_reader = true; > > } > > if (pipe_empty(pipe->head, pipe->tail)) > > wake_next_reader = false; > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > if (was_full) > > wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); > > @@ -462,7 +452,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > > if (unlikely(total_len == 0)) > > return 0; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > if (!pipe->readers) { > > send_sig(SIGPIPE, current, 0); > > @@ -582,19 +572,19 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) > > * after waiting we need to re-check whether the pipe > > * become empty while we dropped the lock. > > */ > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > if (was_empty) > > wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); > > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > > wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > was_empty = pipe_empty(pipe->head, pipe->tail); > > wake_next_writer = true; > > } > > out: > > if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) > > wake_next_writer = false; > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > /* > > * If we do do a wakeup event, we do a 'sync' wakeup, because we > > @@ -629,7 +619,7 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > > > switch (cmd) { > > case FIONREAD: > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > count = 0; > > head = pipe->head; > > tail = pipe->tail; > > @@ -639,16 +629,16 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) > > count += pipe->bufs[tail & mask].len; > > tail++; > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > return put_user(count, (int __user *)arg); > > > > #ifdef CONFIG_WATCH_QUEUE > > case IOC_WATCH_QUEUE_SET_SIZE: { > > int ret; > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > ret = watch_queue_set_size(pipe, arg); > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return ret; > > } > > > > @@ -734,7 +724,7 @@ pipe_release(struct inode *inode, struct file *file) > > { > > struct pipe_inode_info *pipe = file->private_data; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > if (file->f_mode & FMODE_READ) > > pipe->readers--; > > if (file->f_mode & FMODE_WRITE) > > @@ -747,7 +737,7 @@ pipe_release(struct inode *inode, struct file *file) > > kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); > > kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > put_pipe_info(inode, pipe); > > return 0; > > @@ -759,7 +749,7 @@ pipe_fasync(int fd, struct file *filp, int on) > > struct pipe_inode_info *pipe = filp->private_data; > > int retval = 0; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > if (filp->f_mode & FMODE_READ) > > retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); > > if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { > > @@ -768,7 +758,7 @@ pipe_fasync(int fd, struct file *filp, int on) > > /* this can happen only if on == T */ > > fasync_helper(-1, filp, 0, &pipe->fasync_readers); > > } > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return retval; > > } > > > > @@ -834,6 +824,7 @@ struct pipe_inode_info *alloc_pipe_info(void) > > pipe->nr_accounted = pipe_bufs; > > pipe->user = user; > > mutex_init(&pipe->mutex); > > + lock_set_cmp_fn(&pipe->mutex, pipe_lock_cmp_fn, NULL); > > return pipe; > > } > > > > @@ -1144,7 +1135,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > > filp->private_data = pipe; > > /* OK, we have a pipe and it's pinned down */ > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > /* We can only do regular read/write on fifos */ > > stream_open(inode, filp); > > @@ -1214,7 +1205,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > > } > > > > /* Ok! */ > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return 0; > > > > err_rd: > > @@ -1230,7 +1221,7 @@ static int fifo_open(struct inode *inode, struct file *filp) > > goto err; > > > > err: > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > > > put_pipe_info(inode, pipe); > > return ret; > > @@ -1411,7 +1402,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > > if (!pipe) > > return -EBADF; > > > > - __pipe_lock(pipe); > > + mutex_lock(&pipe->mutex); > > > > switch (cmd) { > > case F_SETPIPE_SZ: > > @@ -1425,7 +1416,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned int arg) > > break; > > } > > > > - __pipe_unlock(pipe); > > + mutex_unlock(&pipe->mutex); > > return ret; > > } > > > > -- > > 2.43.0 > > > -- > Jan Kara <jack@xxxxxxxx> > SUSE Labs, CR >