On (19/11/19 10:41), Petr Mladek wrote: [..] > > > I do not like this. As a result, normal printk() will always deadlock > > > in the scheduler code, including WARN() calls. The chance of the > > > deadlock is small now. It happens only when there is another > > > process waiting for console_sem. > > > > Why would it *always* deadlock? If this is the case, why we don't *always* > > deadlock doing the very same wake_up_process() from console_unlock()? > > I speak about _normal_ printk() and not about printk_deferred(). > > wake_up_process() is called in console_unlock() only when > sem->wait_list is not empty, see up() in kernel/locking/semaphore.c. > printk() itself uses console_trylock() and does not wait. Sorry, I should have spent more time on this. So here's what I should have written instead. console_sem is not for kthreads or kernel only. User space apps can sleep on it. And on some setups console_sem becomes really congested, with often times non-empty console_sem->wait_list. All because of user space, we don't even need printk()-s. For example, when user space uses tty for I/O. Significantly trimmed logs from my laptop (a simple patch below). Just to show that user space sleeps on console_sem (IOW, non-empty ->wait_list). Notice - ls, bash, journalctl, etc. All competing for console_sem. [..] printk: >>> bash WOKEUP console_sem() waiter printk: task journalctl couldn't lock console_sem and had to schedule out Call Trace: dump_stack+0x50/0x70 console_lock.cold+0x21/0x26 con_write+0x28/0x50 n_tty_write+0x12f/0x4c0 ? wait_woken+0xa0/0xa0 tty_write+0x1a9/0x2f0 ? n_tty_read+0x930/0x930 vfs_write+0xb7/0x1e0 printk: >>> ls WOKEUP console_sem() waiter printk: task kworker/3:1 couldn't lock console_sem and had to schedule out Workqueue: events console_callback Call Trace: dump_stack+0x50/0x70 console_lock.cold+0x21/0x26 console_callback+0xa/0x150 ? _cond_resched+0x15/0x50 process_one_work+0x18b/0x2e0 worker_thread+0x4e/0x3d0 ? process_one_work+0x2e0/0x2e0 kthread+0x119/0x130 printk: >>> kworker/3:1 WOKEUP console_sem() waiter printk: task ls couldn't lock console_sem and had to schedule out Call Trace: dump_stack+0x50/0x70 console_lock.cold+0x21/0x26 con_write+0x28/0x50 do_output_char+0x191/0x1f0 n_tty_write+0x17e/0x4c0 ? __switch_to_asm+0x24/0x60 ? wait_woken+0xa0/0xa0 tty_write+0x1a9/0x2f0 ? n_tty_read+0x930/0x930 vfs_write+0xb7/0x1e0 [..] > I believe that this is the rason why printk_sched() was added > so late in 2012. It was more than 10 years after adding > the semaphore into console_unlock(). IMHO, the deadlock > was rare. Of course, it was also hard to debug but it > would not take 10 years. Right. I also think scheduler people do pretty nice work avoiding printk calls under ->rq lock. What I tried to say - it's really not that hard to have a non-empty console_sem ->wait_list, any "wrong" printk() call from scheduler will deadlock us, because we have something to wake_up(). /////////////////////////////////////////////////////////////////////// // A simple patch --- diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h index 6694d0019a68..c80a631f5f96 100644 --- a/include/linux/semaphore.h +++ b/include/linux/semaphore.h @@ -35,11 +35,11 @@ static inline void sema_init(struct semaphore *sem, int val) lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0); } -extern void down(struct semaphore *sem); +extern bool down(struct semaphore *sem); extern int __must_check down_interruptible(struct semaphore *sem); extern int __must_check down_killable(struct semaphore *sem); extern int __must_check down_trylock(struct semaphore *sem); extern int __must_check down_timeout(struct semaphore *sem, long jiffies); -extern void up(struct semaphore *sem); +extern bool up(struct semaphore *sem); #endif /* __LINUX_SEMAPHORE_H */ diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index d9dd94defc0a..daefcbe08147 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -50,16 +50,20 @@ static noinline void __up(struct semaphore *sem); * Use of this function is deprecated, please use down_interruptible() or * down_killable() instead. */ -void down(struct semaphore *sem) +bool down(struct semaphore *sem) { unsigned long flags; + bool ret = false; raw_spin_lock_irqsave(&sem->lock, flags); if (likely(sem->count > 0)) sem->count--; - else + else { __down(sem); + ret = true; + } raw_spin_unlock_irqrestore(&sem->lock, flags); + return ret; } EXPORT_SYMBOL(down); @@ -175,16 +179,20 @@ EXPORT_SYMBOL(down_timeout); * Release the semaphore. Unlike mutexes, up() may be called from any * context and even by tasks which have never called down(). */ -void up(struct semaphore *sem) +bool up(struct semaphore *sem) { unsigned long flags; + bool ret = false; raw_spin_lock_irqsave(&sem->lock, flags); - if (likely(list_empty(&sem->wait_list))) + if (likely(list_empty(&sem->wait_list))) { sem->count++; - else + } else { __up(sem); + ret = true; + } raw_spin_unlock_irqrestore(&sem->lock, flags); + return ret; } EXPORT_SYMBOL(up); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ca65327a6de8..42ffaaa01859 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -214,12 +214,21 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, /* Number of registered extended console drivers. */ static int nr_ext_console_drivers; +static void ____down(void) +{ + bool sched_out = down(&console_sem); + if (sched_out) { + pr_err("task %s couldn't lock console_sem and had to schedule out\n", current->comm); + dump_stack(); + } +} + /* * Helper macros to handle lockdep when locking/unlocking console_sem. We use * macros instead of functions so that _RET_IP_ contains useful information. */ #define down_console_sem() do { \ - down(&console_sem);\ + ____down(); \ mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);\ } while (0) @@ -244,15 +253,19 @@ static int __down_trylock_console_sem(unsigned long ip) } #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) -static void __up_console_sem(unsigned long ip) +static bool __up_console_sem(unsigned long ip) { unsigned long flags; + bool ret; mutex_release(&console_lock_dep_map, 1, ip); printk_safe_enter_irqsave(flags); - up(&console_sem); + ret = up(&console_sem); + if (ret) + pr_err(">>> %s WOKEUP console_sem() waiter\n", current->comm); printk_safe_exit_irqrestore(flags); + return ret; } #define up_console_sem() __up_console_sem(_RET_IP_)