On Tue, 2018-04-17 at 14:53 +0300, Kirill Tkhai wrote: > Hi, Jeff, > > On 17.04.2018 14:42, Jeff Layton wrote: > > On Thu, 2018-04-05 at 14:58 +0300, Kirill Tkhai wrote: > > > I observed the following deadlock between them: > > > > > > [task 1] [task 2] [task 3] > > > kill_fasync() mm_update_next_owner() copy_process() > > > spin_lock_irqsave(&fa->fa_lock) read_lock(&tasklist_lock) write_lock_irq(&tasklist_lock) > > > send_sigio() <IRQ> ... > > > read_lock(&fown->lock) kill_fasync() ... > > > read_lock(&tasklist_lock) spin_lock_irqsave(&fa->fa_lock) ... > > > > > > Task 1 can't acquire read locked tasklist_lock, since there is > > > already task 3 expressed its wish to take the lock exclusive. > > > Task 2 holds the read locked lock, but it can't take the spin lock. > > > > > > Also, there is possible another deadlock (which I haven't observed): > > > > > > [task 1] [task 2] > > > f_getown() kill_fasync() > > > read_lock(&f_own->lock) spin_lock_irqsave(&fa->fa_lock,) > > > <IRQ> send_sigio() write_lock_irq(&f_own->lock) > > > kill_fasync() read_lock(&fown->lock) > > > spin_lock_irqsave(&fa->fa_lock,) > > > > > > Actually, we do not need exclusive fa->fa_lock in kill_fasync_rcu(), > > > as it guarantees fa->fa_file->f_owner integrity only. It may seem, > > > that it used to give a task a small possibility to receive two sequential > > > signals, if there are two parallel kill_fasync() callers, and task > > > handles the first signal fastly, but the behaviour won't become > > > different, since there is exclusive sighand lock in do_send_sig_info(). > > > > > > The patch converts fa_lock into rwlock_t, and this fixes two above > > > deadlocks, as rwlock is allowed to be taken from interrupt handler > > > by qrwlock design. > > > > > > Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx> > > > > > > I used the following program for testing: > > > > > > #include <unistd.h> > > > #include <stdlib.h> > > > #include <signal.h> > > > #include <fcntl.h> > > > #include <errno.h> > > > #include <stdio.h> > > > > > > #ifndef F_SETSIG > > > #define F_SETSIG 10 > > > #endif > > > > > > void handler(int sig) > > > { > > > } > > > > > > main() > > > { > > > unsigned int flags; > > > int fd; > > > > > > system("echo 8 > /proc/sys/kernel/random/read_wakeup_threshold"); > > > system("while :; do ls -R / > /dev/random 2>&1 ; echo 3 > /proc/sys/vm/drop_caches; done &"); > > > > > > if (signal(SIGINT, handler) < 0) { > > > perror("Signal"); > > > exit(1); > > > } > > > > > > fd = open("/dev/random", O_RDWR); > > > if (fd < 0) { > > > perror("Can't open"); > > > exit(1); > > > } > > > > > > flags = FASYNC | fcntl(fd, F_GETFL); > > > if (fcntl(fd, F_SETFL, flags) < 0) { > > > perror("Setfl"); > > > exit(1); > > > } > > > if (fcntl(fd, F_SETOWN, getpid())) { > > > perror("Setown"); > > > exit(1); > > > } > > > if (fcntl(fd, F_SETSIG, SIGINT)) { > > > perror("Setsig"); > > > exit(1); > > > } > > > > > > while (1) > > > sleep(100); > > > } > > > --- > > > fs/fcntl.c | 15 +++++++-------- > > > include/linux/fs.h | 2 +- > > > 2 files changed, 8 insertions(+), 9 deletions(-) > > > > > > diff --git a/fs/fcntl.c b/fs/fcntl.c > > > index 1e97f1fda90c..780161a11f9d 100644 > > > --- a/fs/fcntl.c > > > +++ b/fs/fcntl.c > > > @@ -865,9 +865,9 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) > > > if (fa->fa_file != filp) > > > continue; > > > > > > - spin_lock_irq(&fa->fa_lock); > > > + write_lock_irq(&fa->fa_lock); > > > fa->fa_file = NULL; > > > - spin_unlock_irq(&fa->fa_lock); > > > + write_unlock_irq(&fa->fa_lock); > > > > > > *fp = fa->fa_next; > > > call_rcu(&fa->fa_rcu, fasync_free_rcu); > > > @@ -912,13 +912,13 @@ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasy > > > if (fa->fa_file != filp) > > > continue; > > > > > > - spin_lock_irq(&fa->fa_lock); > > > + write_lock_irq(&fa->fa_lock); > > > fa->fa_fd = fd; > > > - spin_unlock_irq(&fa->fa_lock); > > > + write_unlock_irq(&fa->fa_lock); > > > goto out; > > > } > > > > > > - spin_lock_init(&new->fa_lock); > > > + rwlock_init(&new->fa_lock); > > > new->magic = FASYNC_MAGIC; > > > new->fa_file = filp; > > > new->fa_fd = fd; > > > @@ -981,14 +981,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) > > > { > > > while (fa) { > > > struct fown_struct *fown; > > > - unsigned long flags; > > > > > > if (fa->magic != FASYNC_MAGIC) { > > > printk(KERN_ERR "kill_fasync: bad magic number in " > > > "fasync_struct!\n"); > > > return; > > > } > > > - spin_lock_irqsave(&fa->fa_lock, flags); > > > + read_lock(&fa->fa_lock); > > > > Does this need to be read_lock_irq? Why is it ok to allow interrupts > > here? > > Read locked rwlock can be taken for reading from IRQ once again even > if there is a writer pending, while spin lock can't: > > void queued_read_lock_slowpath(struct qrwlock *lock) > { > /* > * Readers come here when they cannot get the lock without waiting > */ > if (unlikely(in_interrupt())) { > /* > * Readers in interrupt context will get the lock immediately > * if the writer is just waiting (not holding the lock yet), > * so spin with ACQUIRE semantics until the lock is available > * without waiting in the queue. > */ > atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED)); > return; > } > > So, when we replace spinlock with read_lock(), we don't need disable IRQs anymore. > All we need is to make write_lock always disable IRQs. Got it, thanks. read_lock_irq is still used in several (rather obscure) places. Does this mean that we should do a global s/read_lock_irq/read_lock/ and remove it? Or is it still useful to disable irqs for some read_lock acquisitions? > > > > if (fa->fa_file) { > > > fown = &fa->fa_file->f_owner; > > > /* Don't send SIGURG to processes which have not set a > > > @@ -997,7 +996,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) > > > if (!(sig == SIGURG && fown->signum == 0)) > > > send_sigio(fown, fa->fa_fd, band); > > > } > > > - spin_unlock_irqrestore(&fa->fa_lock, flags); > > > + read_unlock(&fa->fa_lock); > > > fa = rcu_dereference(fa->fa_next); > > > } > > > } > > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > > index c6baf767619e..297e2dcd9dd2 100644 > > > --- a/include/linux/fs.h > > > +++ b/include/linux/fs.h > > > @@ -1250,7 +1250,7 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) > > > } > > > > > > struct fasync_struct { > > > - spinlock_t fa_lock; > > > + rwlock_t fa_lock; > > > int magic; > > > int fa_fd; > > > struct fasync_struct *fa_next; /* singly linked list */ > > > > > > > I've no objection to the patch in principle, but I'm not as familiar > > with the fasync code as others here. > > I took the reviewers list from MAINTAINERS and ./scripts/get_maintainer.pl, > don't have an ideas what else should be CCed. No worries. The patch seems sane enough to me. You can add: Acked-by: Jeff Layton <jlayton@xxxxxxxxxx>