Hi, Jeff, On 17.04.2018 14:42, Jeff Layton wrote: > On Thu, 2018-04-05 at 14:58 +0300, Kirill Tkhai wrote: >> I observed the following deadlock between them: >> >> [task 1] [task 2] [task 3] >> kill_fasync() mm_update_next_owner() copy_process() >> spin_lock_irqsave(&fa->fa_lock) read_lock(&tasklist_lock) write_lock_irq(&tasklist_lock) >> send_sigio() <IRQ> ... >> read_lock(&fown->lock) kill_fasync() ... >> read_lock(&tasklist_lock) spin_lock_irqsave(&fa->fa_lock) ... >> >> Task 1 can't acquire read locked tasklist_lock, since there is >> already task 3 expressed its wish to take the lock exclusive. >> Task 2 holds the read locked lock, but it can't take the spin lock. >> >> Also, there is possible another deadlock (which I haven't observed): >> >> [task 1] [task 2] >> f_getown() kill_fasync() >> read_lock(&f_own->lock) spin_lock_irqsave(&fa->fa_lock,) >> <IRQ> send_sigio() write_lock_irq(&f_own->lock) >> kill_fasync() read_lock(&fown->lock) >> spin_lock_irqsave(&fa->fa_lock,) >> >> Actually, we do not need exclusive fa->fa_lock in kill_fasync_rcu(), >> as it guarantees fa->fa_file->f_owner integrity only. It may seem, >> that it used to give a task a small possibility to receive two sequential >> signals, if there are two parallel kill_fasync() callers, and task >> handles the first signal fastly, but the behaviour won't become >> different, since there is exclusive sighand lock in do_send_sig_info(). >> >> The patch converts fa_lock into rwlock_t, and this fixes two above >> deadlocks, as rwlock is allowed to be taken from interrupt handler >> by qrwlock design. >> >> Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx> >> >> I used the following program for testing: >> >> #include <unistd.h> >> #include <stdlib.h> >> #include <signal.h> >> #include <fcntl.h> >> #include <errno.h> >> #include <stdio.h> >> >> #ifndef F_SETSIG >> #define F_SETSIG 10 >> #endif >> >> void handler(int sig) >> { >> } >> >> main() >> { >> unsigned int flags; >> int fd; >> >> system("echo 8 > /proc/sys/kernel/random/read_wakeup_threshold"); >> system("while :; do ls -R / > /dev/random 2>&1 ; echo 3 > /proc/sys/vm/drop_caches; done &"); >> >> if (signal(SIGINT, handler) < 0) { >> perror("Signal"); >> exit(1); >> } >> >> fd = open("/dev/random", O_RDWR); >> if (fd < 0) { >> perror("Can't open"); >> exit(1); >> } >> >> flags = FASYNC | fcntl(fd, F_GETFL); >> if (fcntl(fd, F_SETFL, flags) < 0) { >> perror("Setfl"); >> exit(1); >> } >> if (fcntl(fd, F_SETOWN, getpid())) { >> perror("Setown"); >> exit(1); >> } >> if (fcntl(fd, F_SETSIG, SIGINT)) { >> perror("Setsig"); >> exit(1); >> } >> >> while (1) >> sleep(100); >> } >> --- >> fs/fcntl.c | 15 +++++++-------- >> include/linux/fs.h | 2 +- >> 2 files changed, 8 insertions(+), 9 deletions(-) >> >> diff --git a/fs/fcntl.c b/fs/fcntl.c >> index 1e97f1fda90c..780161a11f9d 100644 >> --- a/fs/fcntl.c >> +++ b/fs/fcntl.c >> @@ -865,9 +865,9 @@ int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp) >> if (fa->fa_file != filp) >> continue; >> >> - spin_lock_irq(&fa->fa_lock); >> + write_lock_irq(&fa->fa_lock); >> fa->fa_file = NULL; >> - spin_unlock_irq(&fa->fa_lock); >> + write_unlock_irq(&fa->fa_lock); >> >> *fp = fa->fa_next; >> call_rcu(&fa->fa_rcu, fasync_free_rcu); >> @@ -912,13 +912,13 @@ struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasy >> if (fa->fa_file != filp) >> continue; >> >> - spin_lock_irq(&fa->fa_lock); >> + write_lock_irq(&fa->fa_lock); >> fa->fa_fd = fd; >> - spin_unlock_irq(&fa->fa_lock); >> + write_unlock_irq(&fa->fa_lock); >> goto out; >> } >> >> - spin_lock_init(&new->fa_lock); >> + rwlock_init(&new->fa_lock); >> new->magic = FASYNC_MAGIC; >> new->fa_file = filp; >> new->fa_fd = fd; >> @@ -981,14 +981,13 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) >> { >> while (fa) { >> struct fown_struct *fown; >> - unsigned long flags; >> >> if (fa->magic != FASYNC_MAGIC) { >> printk(KERN_ERR "kill_fasync: bad magic number in " >> "fasync_struct!\n"); >> return; >> } >> - spin_lock_irqsave(&fa->fa_lock, flags); >> + read_lock(&fa->fa_lock); > > Does this need to be read_lock_irq? Why is it ok to allow interrupts > here? Read locked rwlock can be taken for reading from IRQ once again even if there is a writer pending, while spin lock can't: void queued_read_lock_slowpath(struct qrwlock *lock) { /* * Readers come here when they cannot get the lock without waiting */ if (unlikely(in_interrupt())) { /* * Readers in interrupt context will get the lock immediately * if the writer is just waiting (not holding the lock yet), * so spin with ACQUIRE semantics until the lock is available * without waiting in the queue. */ atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED)); return; } So, when we replace spinlock with read_lock(), we don't need disable IRQs anymore. All we need is to make write_lock always disable IRQs. >> if (fa->fa_file) { >> fown = &fa->fa_file->f_owner; >> /* Don't send SIGURG to processes which have not set a >> @@ -997,7 +996,7 @@ static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band) >> if (!(sig == SIGURG && fown->signum == 0)) >> send_sigio(fown, fa->fa_fd, band); >> } >> - spin_unlock_irqrestore(&fa->fa_lock, flags); >> + read_unlock(&fa->fa_lock); >> fa = rcu_dereference(fa->fa_next); >> } >> } >> diff --git a/include/linux/fs.h b/include/linux/fs.h >> index c6baf767619e..297e2dcd9dd2 100644 >> --- a/include/linux/fs.h >> +++ b/include/linux/fs.h >> @@ -1250,7 +1250,7 @@ static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) >> } >> >> struct fasync_struct { >> - spinlock_t fa_lock; >> + rwlock_t fa_lock; >> int magic; >> int fa_fd; >> struct fasync_struct *fa_next; /* singly linked list */ >> > > I've no objection to the patch in principle, but I'm not as familiar > with the fasync code as others here. I took the reviewers list from MAINTAINERS and ./scripts/get_maintainer.pl, don't have an ideas what else should be CCed. Thanks, Kirill