Patchset for kernel 3.10 (fixing lockup in printk)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hallo all,

attached to this mail you will find a couple of patches fixing one bug I
have with kernel 3.10 (all subreleases).
These patches have been developed originally by Jan Kara (jack@xxxxxxx,
I guess you
know him better than I do) for kernel 3.13 and can be found here:
https://lkml.org/lkml/2014/3/25/343
I just have backported them to kernel 3.10.

Also I have created (at least it was a try by myself) a bugentry on
kernel.org:
https://bugzilla.kernel.org/show_bug.cgi?id=71231
(I have to update it yet)

Summarize:
Creating lots of output on console that is going to be printed on serial
port too (via kernel command line "console=ttyS0") will hang several
systems (mostly Dell servers with multiple CPUs).
I can easily reproduce this behaviour by creating about 50 virtual LUNs
via fibrechannel at once.
This patch from Jan introduces a buffering and better locking in printk
(as far as I understood him or his patches right).

Without this patches I get teh following stacktraces and the system is
not responding after a couple of seconds:

  119.504736] Code: 00 48 89 f0 48 c7 06 00 00 00 00 48 89 e5 48 87 07
48 85 c0 75 09 c7 46 08 01 00 00 00 5d c3 48 89 30 8b 46 08 85 c0 75 f4
f3 90 <8b> 46 08 85 c0 74 f7 5d c3 0f 1f 44 00 00 48 8b 16 55 48 89 e5 [
 119.504740] NMI backtrace for cpu 44
[  119.504746] CPU: 44 PID: 21877 Comm: lsscsi Not tainted 3.10.32-64bit #1
[  119.504748] Hardware name: Dell Inc. PowerEdge R910/0P658H, BIOS
1.0.1 02/19/2010
[  119.504751] task: ffff881063510d60 ti: ffff88106351c000 task.ti:
ffff88106351c000
[  119.504760] RIP: 0010:[<ffffffff8105e505>]  [<ffffffff8105e505>]
mspin_lock+0x35/0x40
[  119.504763] RSP: 0018:ffff88106351db38  EFLAGS: 00000246
[  119.504765] RAX: 0000000000000000 RBX: ffffffff81aa9220 RCX:
0000000000000028
[  119.504768] RDX: ffff88106351dfd8 RSI: ffff88106351db60 RDI:
ffffffff81aa9240
[  119.504770] RBP: ffff88106351db38 R08: 70722f3374736f68 R09:
fefefefefefefeff
[  119.504772] R10: ffff88086ddc5bc0 R11: 8f8dd0cc8b8c9097 R12:
ffff88106351db60
[  119.504774] R13: ffffffff81aa9240 R14: ffff881063510d60 R15:
ffff88106351dca8
[  119.504777] FS:  0000000000000000(0000) GS:ffff88046fd60000(0063)
knlGS:00000000f762f8d0
[  119.504780] CS:  0010 DS: 002b ES: 002b CR0: 0000000080050033
[  119.504782] CR2: 00000000f777c000 CR3: 00000010684ea000 CR4:
00000000000007e0
[  119.504785] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[  119.504788] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
0000000000000400
[  119.504789] Stack:
[  119.504794]  ffff88106351dba8 ffffffff81766d5d ffff88106351db58
ffff88106351dfd8
[  119.504798]  ffff88106351dba8 ffff881066ebfb90 ffff881000000000
ffffffff00000003
[  119.504803]  ffff88106351db98 ffffffff81aa9220 ffff88106351dc98
ffff88046c3a94d0
[  119.504804] Call Trace:
[  119.504812]  [<ffffffff81766d5d>] __mutex_lock_slowpath+0x5d/0x1d0
[  119.504817]  [<ffffffff817667dd>] mutex_lock+0x1d/0x40
[  119.504823]  [<ffffffff81156ba5>] sysfs_dentry_revalidate+0x35/0x100
[  119.504828]  [<ffffffff810ff389>] lookup_fast+0x299/0x2e0
[  119.504833]  [<ffffffff810ff586>] ? __inode_permission+0x46/0x70
[  119.504838]  [<ffffffff810ff79a>] link_path_walk+0x19a/0x810
[  119.504843]  [<ffffffff810ff994>] link_path_walk+0x394/0x810
[  119.504848]  [<ffffffff810b9b8f>] ? release_pages+0x18f/0x1e0
[  119.504853]  [<ffffffff81103094>] path_openat.isra.72+0x94/0x460
[  119.504859]  [<ffffffff810cbf84>] ? tlb_flush_mmu+0x54/0x90
[  119.504864]  [<ffffffff810d2c79>] ? unmap_region+0xd9/0x120
[  119.504869]  [<ffffffff8110368c>] do_filp_open+0x3c/0x90
[  119.504875]  [<ffffffff8110ee52>] ? __alloc_fd+0x42/0x100
[  119.504882]  [<ffffffff810f3a3f>] do_sys_open+0xef/0x1d0
[  119.504888]  [<ffffffff8113a036>] compat_SyS_open+0x16/0x20
[  119.504893]  [<ffffffff8176b598>] sysenter_dispatch+0x7/0x25
[  119.504938] Code: f0 48 c7 06 00 00 00 00 48 89 e5 48 87 07 48 85 c0
75 09 c7 46 08 01 00 00 00 5d c3 48 89 30 8b 46 08 85 c0 75 f4 f3 90 8b
46 08 <85> c0 74 f7 5d c3 0f 1f 44 00 00 48 8b 16 55 48 89 e5 48 85 d2 [
 119.504942] NMI backtrace for cpu 45
[  119.504946] CPU: 45 PID: 0 Comm: swapper/45 Not tainted 3.10.32-64bit #1
[  119.504949] Hardware name: Dell Inc. PowerEdge R910/0P658H, BIOS
1.0.1 02/19/2010
[  119.504952] task: ffff88046dcd6b00 ti: ffff88046dcf6000 task.ti:
ffff88046dcf6000
[  119.504960] RIP: 0010:[<ffffffff8102318a>]  [<ffffffff8102318a>]
mwait_idle_with_hints+0x5a/0x70
[  119.504962] RSP: 0018:ffff88046dcf7dd8  EFLAGS: 00000046
[  119.504965] RAX: 0000000000000000 RBX: ffff88106d07d46c RCX:
0000000000000001
[  119.504967] RDX: 0000000000000000 RSI: 0000000000000001 RDI:
0000000000000000
[  119.504970] RBP: ffff88046dcf7dd8 R08: ffff88046dcf7fd8 R09:
000000000000001c
[  119.504972] R10: 0000000000000166 R11: 00000000016f1e50 R12:
0000000000000001
[  119.504975] R13: ffff88106d07d400 R14: 0000000000000001 R15:
ffffffffa0004830
[  119.504978] FS:  0000000000000000(0000) GS:ffff88086fd60000(0000)
knlGS:0000000000000000
[  119.504981] CS:  0010 DS: 0000 ES: 0000 CR0: 000000008005003b
[  119.504984] CR2: 00000000f7746000 CR3: 000000046cb57000 CR4:
00000000000007e0
[  119.504986] DR0: 0000000000000000 DR1: 0000000000000000 DR2:
0000000000000000
[  119.504989] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7:
0000000000000400
[  119.504989] Stack:
[  119.504994]  ffff88046dcf7de8 ffffffff810231cd ffff88046dcf7df8
ffffffffa0001515
[  119.504999]  ffff88046dcf7e28 ffffffffa000164a 00000000000000f5
ffff88106c669000
[  119.505003]  ffffffffa00047c0 0000001aaf4aa38f ffff88046dcf7e88
ffffffff8164e83a
[  119.505004] Call Trace:
[  119.505010]  [<ffffffff810231cd>]
acpi_processor_ffh_cstate_enter+0x2d/0x30
[  119.505021]  [<ffffffffa0001515>] acpi_idle_do_entry+0x10/0x2b
[processor]
[  119.505028]  [<ffffffffa000164a>] acpi_idle_enter_c1+0x5c/0x81
[processor]
[  119.505035]  [<ffffffff8164e83a>] cpuidle_enter_state+0x4a/0xe0
[  119.505041]  [<ffffffff8164e96e>] cpuidle_idle_call+0x9e/0x160
[  119.505046]  [<ffffffff8106092d>] ? __atomic_notifier_call_chain+0xd/0x10
[  119.505052]  [<ffffffff8100af49>] arch_cpu_idle+0x9/0x30
[  119.505057]  [<ffffffff81073241>] cpu_startup_entry+0x91/0x180
[  119.505063]  [<ffffffff81b4be73>] start_secondary+0x1a0/0x1a4


Should these patches be merged into kernel 3.10 line?
And maybe also to 3.12? Or is there any other better solution?

Best regards,
Alex




diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 74826b1e2529..3c6d5aec501a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2621,18 +2621,19 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 			default: disabled
 
-	printk.offload_chars=
+	printk.offload_chars=	[KNL]
 			Printing to console can be relatively slow especially
 			in case of serial console. When there is intensive
 			printing happening from several cpus (as is the case
 			during boot), a cpu can be spending significant time
 			(seconds or more) doing printing. To avoid softlockups,
 			lost interrupts, and similar problems other cpus
-			will take over printing after the currently printing
-			cpu has printed 'printk.offload_chars' characters.
-			Higher value means possibly longer interrupt and other
-			latencies but lower overhead of printing due to handing
-			over of printing.
+			will take over printing (if CONFIG_PRINTK_OFFLOAD=y)
+			after the currently printing cpu has printed
+			'printk.offload_chars' characters. Higher value means
+			possibly longer interrupt and other latencies but
+			lower overhead of printing due to handing over of
+			printing.
 			Format: <number> (0 = disabled)
 			default: 1000
 
diff --git a/init/Kconfig b/init/Kconfig
index 009a797dd242..45aa7368d92f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1343,6 +1343,20 @@ config PRINTK
 	  very difficult to diagnose system problems, saying N here is
 	  strongly discouraged.
 
+config PRINTK_OFFLOAD
+	default y
+	bool "Enable support for offloading printing to different CPU"
+	depends on PRINTK
+	help
+	  Printing to console can be relatively slow especially in case of
+	  serial console. On large machines when there is intensive printing
+	  happening from several cpus (as is the case during boot), a cpu can
+	  be spending significant time (seconds or more) doing printing. To
+	  avoid softlockups, lost interrupts, and similar problems other cpus
+	  will take over printing after the currently printing cpu has printed
+	  certain number of characters (tunable via 'printk.offload_chars'
+	  kernel parameter).
+
 config BUG
 	bool "BUG() support" if EXPERT
 	default y
diff --git a/kernel/printk.c b/kernel/printk.c
index 1c0577383af5..c3ad3b834f68 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -103,6 +103,7 @@ enum {
 };
 static long printk_handover_state;
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 /*
  * Number of kernel threads for offloading printing. We need at least two so
  * that they can hand over printing from one to another one and thus switch
@@ -116,6 +117,7 @@ static DEFINE_MUTEX(printk_kthread_mutex);
 
 /* Wait queue printing kthreads sleep on when idle */
 static DECLARE_WAIT_QUEUE_HEAD(print_queue); 
+#endif	/* CONFIG_PRINTK_OFFLOAD */
 
 #ifdef CONFIG_LOCKDEP
 static struct lockdep_map console_lock_dep_map = {
@@ -284,6 +286,7 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 static int offload_chars_set(const char *val, const struct kernel_param *kp);
 static struct kernel_param_ops offload_chars_ops = {
 	.set = offload_chars_set,
@@ -302,6 +305,7 @@ module_param_cb(offload_chars, &offload_chars_ops, &printk_offload_chars,
 		   S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(offload_chars, "offload printing to console to a different"
 	" cpu after this number of characters");
+#endif
 
 /* human readable text of the record */
 static char *log_text(const struct printk_log *msg)
@@ -2021,6 +2025,7 @@ int console_trylock(void)
 }
 EXPORT_SYMBOL(console_trylock);
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 /*
  * This is a version of console_lock() which spins to acquire console_sem.
  * It is only for use by threads that take care of flushing printk buffer so
@@ -2052,6 +2057,7 @@ static int console_lock_try_spin(void)
 	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
 	return 1;
 }
+#endif
 
 int is_console_locked(void)
 {
@@ -2087,6 +2093,7 @@ out:
 	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 }
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 /*
  * Returns true iff there is other cpu waiting to take over printing. This
  * function also takes are of setting PRINTK_HANDOVER_B if we want to hand over
@@ -2113,6 +2120,7 @@ static bool cpu_stop_printing(int printed_chars)
 	}
 	return false;
 }
+#endif
 
 /**
  * console_unlock - unlock the console system
@@ -2155,10 +2163,12 @@ again:
 		size_t len;
 		int level;
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 		if (cpu_stop_printing(printed_chars)) {
 			hand_over = true;
 			break;
 		}
+#endif
 
 		raw_spin_lock_irqsave(&logbuf_lock, flags);
 		if (seen_seq != log_next_seq) {
@@ -2576,6 +2586,7 @@ int unregister_console(struct console *console)
 }
 EXPORT_SYMBOL(unregister_console);
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 /* Kthread which takes over printing from a CPU which asks for help */
 static int printing_task(void *arg)
 {
@@ -2638,6 +2649,7 @@ static int offload_chars_set(const char *val, const struct kernel_param *kp)
 	mutex_unlock(&printk_kthread_mutex);
 	return 0;
 }
+#endif	/* CONFIG_PRINTK_OFFLOAD */
 
 static int __init printk_late_init(void)
 {
@@ -2650,9 +2662,11 @@ static int __init printk_late_init(void)
 	}
 	hotcpu_notifier(console_cpu_notify, 0);
 
+#ifdef CONFIG_PRINTK_OFFLOAD
 	mutex_lock(&printk_kthread_mutex);
 	printk_start_offload_kthreads();
 	mutex_unlock(&printk_kthread_mutex);
+#endif
 
 	return 0;
 }
-- 
1.8.1.4

  


diff --git a/kernel/printk.c b/kernel/printk/printk.c
index bd7ee2a9f960..7a8ffd89875c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -249,9 +249,6 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
-/* cpu currently holding logbuf_lock */
-static volatile unsigned int logbuf_cpu = UINT_MAX;
-
 /* human readable text of the record */
 static char *log_text(const struct printk_log *msg)
 {
@@ -1332,36 +1329,22 @@ static inline int can_use_console(unsigned int cpu)
  * messages from a 'printk'. Return true (and with the
  * console_lock held, and 'console_locked' set) if it
  * is successful, false otherwise.
- *
- * This gets called with the 'logbuf_lock' spinlock held and
- * interrupts disabled. It should return with 'lockbuf_lock'
- * released but interrupts still disabled.
  */
 static int console_trylock_for_printk(unsigned int cpu)
-	__releases(&logbuf_lock)
 {
-	int retval = 0, wake = 0;
-
-	if (console_trylock()) {
-		retval = 1;
-
-		/*
-		 * If we can't use the console, we need to release
-		 * the console semaphore by hand to avoid flushing
-		 * the buffer. We need to hold the console semaphore
-		 * in order to do this test safely.
-		 */
-		if (!can_use_console(cpu)) {
-			console_locked = 0;
-			wake = 1;
-			retval = 0;
-		}
-	}
-	logbuf_cpu = UINT_MAX;
-	raw_spin_unlock(&logbuf_lock);
-	if (wake)
+	if (!console_trylock())
+		return 0;
+	/*
+	 * If we can't use the console, we need to release the console
+	 * semaphore by hand to avoid flushing the buffer. We need to hold the
+	 * console semaphore in order to do this test safely.
+	 */
+	if (!can_use_console(cpu)) {
+		console_locked = 0;
 		up(&console_sem);
-	return retval;
+		return 0;
+	}
+	return 1;
 }
 
 int printk_delay_msec __read_mostly;
@@ -1494,6 +1477,9 @@ asmlinkage int vprintk_emit(int facility, int level,
 	unsigned long flags;
 	int this_cpu;
 	int printed_len = 0;
+	/* cpu currently holding logbuf_lock in this function */
+	static volatile unsigned int logbuf_cpu = UINT_MAX;
+
 
 	boot_delay_msec(level);
 	printk_delay();
@@ -1609,13 +1595,12 @@ asmlinkage int vprintk_emit(int facility, int level,
 	}
 	printed_len += text_len;
 
+	logbuf_cpu = UINT_MAX;
+	raw_spin_unlock(&logbuf_lock);
 	/*
 	 * Try to acquire and then immediately release the console semaphore.
 	 * The release will print out buffers and wake up /dev/kmsg and syslog()
 	 * users.
-	 *
-	 * The console_trylock_for_printk() function will release 'logbuf_lock'
-	 * regardless of whether it actually gets the console semaphore or not.
 	 */
 	if (console_trylock_for_printk(this_cpu))
 		console_unlock();
-- 
1.8.1.4

  


diff --git a/include/linux/console.h b/include/linux/console.h
index 7571a16bd653..c61c169f85b3 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -150,6 +150,7 @@ extern int console_trylock(void);
 extern void console_unlock(void);
 extern void console_conditional_schedule(void);
 extern void console_unblank(void);
+extern void console_flush(void);
 extern struct tty_driver *console_device(int *);
 extern void console_stop(struct console *);
 extern void console_start(struct console *);
diff --git a/kernel/printk.c b/kernel/printk.c
index 8d981b2b5bb1..1c0577383af5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -2306,6 +2306,28 @@ struct tty_driver *console_device(int *index)
 }
 
 /*
+ * Wait until all messages accumulated in the printk buffer are printed to
+ * console. Note that as soon as this function returns, new messages may be
+ * added to the printk buffer by other CPUs.
+ */
+void console_flush(void)
+{
+	bool retry;
+	unsigned long flags;
+
+	while (1) {
+		raw_spin_lock_irqsave(&logbuf_lock, flags);
+		retry = console_seq != log_next_seq;
+		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+		if (!retry || console_suspended)
+			break;
+		/* Cycle console_sem to wait for outstanding printing */
+		console_lock();
+		console_unlock();
+	}
+}
+
+/*
  * Prevent further output on the passed console device so that (for example)
  * serial drivers can disable console output before suspending a port, and can
  * re-enable output afterwards.
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 84571e09c907..14ac740e0c7f 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -21,5 +21,6 @@
 #include <linux/smpboot.h>
 #include <linux/atomic.h>
+#include <linux/console.h>
 
 /*
  * Structure to determine completion condition and record errors.  May
@@ -574,6 +575,14 @@ int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 		return ret;
 	}
 
+	/*
+	 * If there are lots of outstanding messages, printing them can take a
+	 * long time and all cpus would be spinning waiting for the printing to
+	 * finish thus triggering NMI watchdog, RCU lockups etc. Wait for the
+	 * printing here to avoid these.
+	 */
+	console_flush();
+
 	/* Set the initial state and stop all online cpus. */
 	set_state(&msdata, MULTI_STOP_PREPARE);
 	return stop_cpus(cpu_online_mask, multi_cpu_stop, &msdata);
-- 
1.8.1.4

  


diff --git a/kernel/printk.c b/kernel/printk.c
index cb7e06850eb5..8d981b2b5bb1 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -109,6 +109,10 @@ static long printk_handover_state;
  * CPUs.
  */
 #define PRINTING_TASKS 2
+/* Pointers to printing kthreads */
+static struct task_struct *printing_kthread[PRINTING_TASKS];
+/* Serialization of changes to printk_offload_chars and kthread creation */
+static DEFINE_MUTEX(printk_kthread_mutex);
 
 /* Wait queue printing kthreads sleep on when idle */
 static DECLARE_WAIT_QUEUE_HEAD(print_queue); 
@@ -280,6 +284,12 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+static int offload_chars_set(const char *val, const struct kernel_param *kp);
+static struct kernel_param_ops offload_chars_ops = {
+	.set = offload_chars_set,
+	.get = param_get_uint,
+};
+
 /*
  * How many characters can we print in one call of printk before asking
  * other cpus to continue printing. 0 means infinity. Tunable via
@@ -288,7 +298,7 @@ static u32 log_buf_len = __LOG_BUF_LEN;
  */
 static unsigned int __read_mostly printk_offload_chars = 1000;
 
-module_param_named(offload_chars, printk_offload_chars, uint,
+module_param_cb(offload_chars, &offload_chars_ops, &printk_offload_chars,
 		   S_IRUGO | S_IWUSR);
 MODULE_PARM_DESC(offload_chars, "offload printing to console to a different"
 	" cpu after this number of characters");
@@ -2571,32 +2581,56 @@ static int printing_task(void *arg)
 	return 0;
 }
 
-static int __init printk_late_init(void)
+static void printk_start_offload_kthreads(void)
 {
-	struct console *con;
 	int i;
 	struct task_struct *task;
 
-	for_each_console(con) {
-		if (!keep_bootcon && con->flags & CON_BOOT) {
-			printk(KERN_INFO "turn off boot console %s%d\n",
-				con->name, con->index);
-			unregister_console(con);
-		}
-	}
-	hotcpu_notifier(console_cpu_notify, 0);
-
-	/* Does any handover of printing have any sence? */
-	if (num_possible_cpus() <= 1)
-		return 0;
-
+	/* Does handover of printing make any sense? */
+	if (printk_offload_chars == 0 || num_possible_cpus() <= 1)
+		return;
 	for (i = 0; i < PRINTING_TASKS; i++) {
+		if (printing_kthread[i])
+			continue;
 		task = kthread_run(printing_task, NULL, "print/%d", i);
 		if (IS_ERR(task)) {
 			pr_err("printk: Cannot create printing thread: %ld\n",
 			       PTR_ERR(task));
 		}
+		printing_kthread[i] = task;
 	}
+}
+
+static int offload_chars_set(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+
+	/* Protect against parallel change of printk_offload_chars */
+	mutex_lock(&printk_kthread_mutex);
+	ret = param_set_uint(val, kp);
+	if (ret) {
+		mutex_unlock(&printk_kthread_mutex);
+		return ret;
+	}
+	printk_start_offload_kthreads();
+	mutex_unlock(&printk_kthread_mutex);
+	return 0;
+}
+
+static int __init printk_late_init(void)
+{
+	struct console *con;
+
+	for_each_console(con) {
+		if (!keep_bootcon && con->flags & CON_BOOT) {
+			unregister_console(con);
+		}
+	}
+	hotcpu_notifier(console_cpu_notify, 0);
+
+	mutex_lock(&printk_kthread_mutex);
+	printk_start_offload_kthreads();
+	mutex_unlock(&printk_kthread_mutex);
 
 	return 0;
 }
-- 
1.8.1.4

  


diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 4dae9cbe9259..bd7ee2a9f960 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -206,8 +206,7 @@ struct printk_log {
 };
 
 /*
- * The logbuf_lock protects kmsg buffer, indices, counters. It is also
- * used in interesting ways to provide interlocking in console_unlock();
+ * The logbuf_lock protects kmsg buffer, indices, counters.
  */
 static DEFINE_RAW_SPINLOCK(logbuf_lock);
 
-- 
1.8.1.4

  


diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 7116fda7077f..74826b1e2529 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2621,6 +2621,21 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 			default: disabled
 
+	printk.offload_chars=
+			Printing to console can be relatively slow especially
+			in case of serial console. When there is intensive
+			printing happening from several cpus (as is the case
+			during boot), a cpu can be spending significant time
+			(seconds or more) doing printing. To avoid softlockups,
+			lost interrupts, and similar problems other cpus
+			will take over printing after the currently printing
+			cpu has printed 'printk.offload_chars' characters.
+			Higher value means possibly longer interrupt and other
+			latencies but lower overhead of printing due to handing
+			over of printing.
+			Format: <number> (0 = disabled)
+			default: 1000
+
 	printk.time=	Show timing data prefixed to each printk message line
 			Format: <bool>  (1/Y/y=enable, 0/N/n=disable)
 
diff --git a/kernel/printk.c b/kernel/printk.c
index 91c554e027c5..cb7e06850eb5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -45,6 +45,7 @@
 #include <linux/poll.h>
 #include <linux/irq_work.h>
 #include <linux/utsname.h>
+#include <linux/kthread.h>
 
 #include <asm/uaccess.h>
 
@@ -87,6 +88,31 @@ static DEFINE_SEMAPHORE(console_sem);
 struct console *console_drivers;
 EXPORT_SYMBOL_GPL(console_drivers);
 
+/* State machine for handing over printing */
+enum {
+	/*
+	 * Set by the holder of console_sem if currently printing task wants to
+	 * hand over printing. Cleared before console_sem is released.
+	 */
+	PRINTK_HANDOVER_B,
+	/*
+	 * Set if there's someone spinning on console_sem to take over printing.
+	 * Cleared after acquiring console_sem.
+	 */
+	PRINTK_CONSOLE_SPIN_B,
+};
+static long printk_handover_state;
+
+/*
+ * Number of kernel threads for offloading printing. We need at least two so
+ * that they can hand over printing from one to another one and thus switch
+ * CPUs.
+ */
+#define PRINTING_TASKS 2
+
+/* Wait queue printing kthreads sleep on when idle */
+static DECLARE_WAIT_QUEUE_HEAD(print_queue); 
+
 #ifdef CONFIG_LOCKDEP
 static struct lockdep_map console_lock_dep_map = {
 	.name = "console_lock"
@@ -254,6 +280,19 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN);
 static char *log_buf = __log_buf;
 static u32 log_buf_len = __LOG_BUF_LEN;
 
+/*
+ * How many characters can we print in one call of printk before asking
+ * other cpus to continue printing. 0 means infinity. Tunable via
+ * printk.offload_chars kernel parameter. Our default 1000 means about
+ * 0.1s maximum latency due to printing.
+ */
+static unsigned int __read_mostly printk_offload_chars = 1000;
+
+module_param_named(offload_chars, printk_offload_chars, uint,
+		   S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(offload_chars, "offload printing to console to a different"
+	" cpu after this number of characters");
+
 /* human readable text of the record */
 static char *log_text(const struct printk_log *msg)
 {
@@ -1942,6 +1981,7 @@ void console_lock(void)
 	if (console_suspended)
 		return;
 	console_locked = 1;
+	printk_handover_state = 0;
 	console_may_schedule = 1;
 	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
 }
@@ -1964,12 +2004,45 @@ int console_trylock(void)
 		return 0;
 	}
 	console_locked = 1;
+	printk_handover_state = 0;
 	console_may_schedule = 0;
 	mutex_acquire(&console_lock_dep_map, 0, 1, _RET_IP_);
 	return 1;
 }
 EXPORT_SYMBOL(console_trylock);
 
+/*
+ * This is a version of console_lock() which spins to acquire console_sem.
+ * It is only for use by threads that take care of flushing printk buffer so
+ * that they can be sure they are not preempted while waiting for console_sem.
+ *
+ * The function returns 1 if we acquired console_sem, 0 if we failed (either
+ * someone else is already spinning, someone acquired console_sem, or console
+ * is suspended).
+ */
+static int console_lock_try_spin(void)
+{
+	WARN_ON_ONCE(preemptible());
+	/* Someone already spinning? Don't waste cpu time... */
+	if (test_and_set_bit(PRINTK_CONSOLE_SPIN_B, &printk_handover_state))
+		return 0;
+	while (down_trylock(&console_sem)) {
+		/* Someone else took console_sem? */
+		if (!test_bit(PRINTK_CONSOLE_SPIN_B, &printk_handover_state))
+			return 0;
+		cpu_relax();
+	}
+	printk_handover_state = 0;
+	if (console_suspended) {
+		up(&console_sem);
+		return 0;
+	}
+	console_locked = 1;
+	console_may_schedule = 0;
+	mutex_acquire(&console_lock_dep_map, 0, 0, _RET_IP_);
+	return 1;
+}
+
 int is_console_locked(void)
 {
 	return console_locked;
@@ -2004,15 +2077,44 @@ out:
 	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 }
 
+/*
+ * Returns true iff there is other cpu waiting to take over printing. This
+ * function also takes are of setting PRINTK_HANDOVER_B if we want to hand over
+ * printing to some other cpu.
+ */
+static bool cpu_stop_printing(int printed_chars)
+{
+	/* Oops? Print everything now to maximize chances user will see it */
+	if (oops_in_progress)
+		return false;
+	if (!printk_offload_chars || printed_chars < printk_offload_chars)
+		return false;
+	/* Someone is spinning on console_sem? Give away to him. */
+	if (test_bit(PRINTK_CONSOLE_SPIN_B, &printk_handover_state))
+		return true;
+	if (!test_bit(PRINTK_HANDOVER_B, &printk_handover_state)) {
+		set_bit(PRINTK_HANDOVER_B, &printk_handover_state);
+		/*
+		 * Paired with barrier in prepare_to_wait_exclusive() in
+		 * printing_task()
+		 */
+		smp_mb();
+		wake_up(&print_queue);
+	}
+	return false;
+}
+
 /**
  * console_unlock - unlock the console system
  *
  * Releases the console_lock which the caller holds on the console system
  * and the console driver list.
  *
- * While the console_lock was held, console output may have been buffered
- * by printk().  If this is the case, console_unlock(); emits
- * the output prior to releasing the lock.
+ * While the console_lock was held, console output may have been buffered by
+ * printk(). If this is the case, console_unlock() emits the output prior to
+ * releasing the lock. However we need not write all the data in the buffer if
+ * we would hog the CPU for too long. In such case we try to hand over printing
+ * to a different cpu.
  *
  * If there is output waiting, we wake /dev/kmsg and syslog() users.
  *
@@ -2025,6 +2127,8 @@ void console_unlock(void)
 	unsigned long flags;
 	bool wake_klogd = false;
 	bool retry;
+	bool hand_over = false;
+	int printed_chars = 0;
 
 	if (console_suspended) {
 		up(&console_sem);
@@ -2041,6 +2145,11 @@ again:
 		size_t len;
 		int level;
 
+		if (cpu_stop_printing(printed_chars)) {
+			hand_over = true;
+			break;
+		}
+
 		raw_spin_lock_irqsave(&logbuf_lock, flags);
 		if (seen_seq != log_next_seq) {
 			wake_klogd = true;
@@ -2054,8 +2163,10 @@ again:
 			console_prev = 0;
 		}
 skip:
-		if (console_seq == log_next_seq)
+		if (console_seq == log_next_seq) {
+			raw_spin_unlock(&logbuf_lock);
 			break;
+		}
 
 		msg = log_from_idx(console_idx);
 		if (msg->flags & LOG_NOCONS) {
@@ -2086,31 +2197,39 @@ skip:
 		stop_critical_timings();	/* don't trace print latency */
 		call_console_drivers(level, text, len);
 		start_critical_timings();
+		printed_chars += len;
 		local_irq_restore(flags);
 	}
-	console_locked = 0;
-	mutex_release(&console_lock_dep_map, 1, _RET_IP_);
 
 	/* Release the exclusive_console once it is used */
 	if (unlikely(exclusive_console))
 		exclusive_console = NULL;
 
-	raw_spin_unlock(&logbuf_lock);
-
+	/* Save modification of printk_handover_state in the common fast path */
+	if (test_bit(PRINTK_HANDOVER_B, &printk_handover_state))
+		clear_bit(PRINTK_HANDOVER_B, &printk_handover_state);
+	console_locked = 0;
+	mutex_release(&console_lock_dep_map, 1, _RET_IP_);
 	up(&console_sem);
 
 	/*
-	 * Someone could have filled up the buffer again, so re-check if there's
-	 * something to flush. In case we cannot trylock the console_sem again,
-	 * there's a new owner and the console_unlock() from them will do the
-	 * flush, no worries.
+	 * Subtlety: We have interrupts disabled iff hand_over == false (to
+	 * save one cli/sti pair in the fast path.
 	 */
-	raw_spin_lock(&logbuf_lock);
-	retry = console_seq != log_next_seq;
-	raw_spin_unlock_irqrestore(&logbuf_lock, flags);
+	if (!hand_over) {
+		/*
+		 * Someone could have filled up the buffer again, so re-check
+		 * if there's something to flush. In case we cannot trylock the
+		 * console_sem again, there's a new owner and the
+		 * console_unlock() from them will do the flush, no worries.
+		 */
+		raw_spin_lock(&logbuf_lock);
+		retry = console_seq != log_next_seq;
+		raw_spin_unlock_irqrestore(&logbuf_lock, flags);
 
-	if (retry && console_trylock())
-		goto again;
+		if (retry && console_trylock())
+			goto again;
+	}
 
 	if (wake_klogd)
 		wake_up_klogd();
@@ -2148,6 +2267,7 @@ void console_unblank(void)
 		console_lock();
 
 	console_locked = 1;
+	printk_handover_state = 0;
 	console_may_schedule = 0;
 	for_each_console(c)
 		if ((c->flags & CON_ENABLED) && c->unblank)
@@ -2424,9 +2544,38 @@ int unregister_console(struct console *console)
 }
 EXPORT_SYMBOL(unregister_console);
 
+/* Kthread which takes over printing from a CPU which asks for help */
+static int printing_task(void *arg)
+{
+	DEFINE_WAIT(wait);
+
+	while (1) {
+		prepare_to_wait_exclusive(&print_queue, &wait,
+					  TASK_INTERRUPTIBLE);
+		if (!test_bit(PRINTK_HANDOVER_B, &printk_handover_state))
+			schedule();
+		finish_wait(&print_queue, &wait);
+		/*
+		 * We don't want to be scheduled away once we got the CPU (that
+		 * would be especially problematic if we hold console_sem at
+		 * that moment since noone else could print to console). So
+		 * disable preemption and spin on console_sem. We shouldn't
+		 * spin for long since printing CPU drops console_sem as soon
+		 * as it notices there is someone spinning on it.
+		 */
+		preempt_disable();
+		if (console_lock_try_spin())
+			console_unlock();
+		preempt_enable();
+	}
+	return 0;
+}
+
 static int __init printk_late_init(void)
 {
 	struct console *con;
+	int i;
+	struct task_struct *task;
 
 	for_each_console(con) {
 		if (!keep_bootcon && con->flags & CON_BOOT) {
@@ -2434,6 +2583,19 @@ static int __init printk_late_init(void)
 		}
 	}
 	hotcpu_notifier(console_cpu_notify, 0);
+
+	/* Does any handover of printing have any sence? */
+	if (num_possible_cpus() <= 1)
+		return 0;
+
+	for (i = 0; i < PRINTING_TASKS; i++) {
+		task = kthread_run(printing_task, NULL, "print/%d", i);
+		if (IS_ERR(task)) {
+			pr_err("printk: Cannot create printing thread: %ld\n",
+			       PTR_ERR(task));
+		}
+	}
+
 	return 0;
 }
 late_initcall(printk_late_init);
-- 
1.8.1.4

  


diff --git a/kernel/printk.c b/kernel/printk.c
index 7a8ffd89875c..56649edfae9c 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -1314,10 +1314,9 @@ static int have_callable_console(void)
 /*
  * Can we actually use the console at this time on this cpu?
  *
- * Console drivers may assume that per-cpu resources have
- * been allocated. So unless they're explicitly marked as
- * being able to cope (CON_ANYTIME) don't call them until
- * this CPU is officially up.
+ * Console drivers may assume that per-cpu resources have been allocated. So
+ * unless they're explicitly marked as being able to cope (CON_ANYTIME) don't
+ * call them until this CPU is officially up.
  */
 static inline int can_use_console(unsigned int cpu)
 {
@@ -1330,8 +1329,10 @@ static inline int can_use_console(unsigned int cpu)
  * console_lock held, and 'console_locked' set) if it
  * is successful, false otherwise.
  */
-static int console_trylock_for_printk(unsigned int cpu)
+static int console_trylock_for_printk(void)
 {
+	unsigned int cpu = smp_processor_id();
+
 	if (!console_trylock())
 		return 0;
 	/*
@@ -1501,7 +1502,8 @@ asmlinkage int vprintk_emit(int facility, int level,
 		 */
 		if (!oops_in_progress && !lockdep_recursing(current)) {
 			recursion_bug = 1;
-			goto out_restore_irqs;
+			local_irq_restore(flags);
+			return 0;
 		}
 		zap_locks();
 	}
@@ -1597,17 +1599,22 @@ asmlinkage int vprintk_emit(int facility, int level,
 
 	logbuf_cpu = UINT_MAX;
 	raw_spin_unlock(&logbuf_lock);
+	lockdep_on();
+	local_irq_restore(flags);
+
+	/*
+	 * Disable preemption to avoid being preempted while holding
+	 * console_sem which would prevent anyone from printing to console
+	 */
+	preempt_disable();
 	/*
 	 * Try to acquire and then immediately release the console semaphore.
 	 * The release will print out buffers and wake up /dev/kmsg and syslog()
 	 * users.
 	 */
-	if (console_trylock_for_printk(this_cpu))
+	if (console_trylock_for_printk())
 		console_unlock();
-
-	lockdep_on();
-out_restore_irqs:
-	local_irq_restore(flags);
+	preempt_enable();
 
 	return printed_len;
 }
-- 
1.8.1.4

  


diff --git a/kernel/printk.c b/kernel/printk.c
index 56649edfae9c..91c554e027c5 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -68,6 +68,9 @@ int console_printk[4] = {
 	DEFAULT_CONSOLE_LOGLEVEL,	/* default_console_loglevel */
 };
 
+/* Deferred messaged from sched code are marked by this special level */
+#define SCHED_MESSAGE_LOGLEVEL -2
+
 /*
  * Low level drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
@@ -206,7 +209,9 @@ struct printk_log {
 };
 
 /*
- * The logbuf_lock protects kmsg buffer, indices, counters.
+ * The logbuf_lock protects kmsg buffer, indices, counters.  This can be taken
+ * within the scheduler's rq lock. It must be released before calling
+ * console_unlock() or anything else that might wake up a process.
  */
 static DEFINE_RAW_SPINLOCK(logbuf_lock);
 
@@ -1473,14 +1478,19 @@ asmlinkage int vprintk_emit(int facility, int level,
 	static int recursion_bug;
 	static char textbuf[LOG_LINE_MAX];
 	char *text = textbuf;
-	size_t text_len;
+	size_t text_len = 0;
 	enum log_flags lflags = 0;
 	unsigned long flags;
 	int this_cpu;
 	int printed_len = 0;
+	bool in_sched = false;
 	/* cpu currently holding logbuf_lock in this function */
 	static volatile unsigned int logbuf_cpu = UINT_MAX;
 
+	if (level == SCHED_MESSAGE_LOGLEVEL) {
+		level = -1;
+		in_sched = true;
+	}
 
 	boot_delay_msec(level);
 	printk_delay();
@@ -1527,7 +1537,12 @@ asmlinkage int vprintk_emit(int facility, int level,
 	 * The printf needs to come first; we need the syslog
 	 * prefix which might be passed-in as a parameter.
 	 */
-	text_len = vscnprintf(text, sizeof(textbuf), fmt, args);
+	if (in_sched)
+		text_len = scnprintf(text, sizeof(textbuf),
+				     KERN_WARNING "[sched_delayed] ");
+
+	text_len += vscnprintf(text + text_len,
+			       sizeof(textbuf) - text_len, fmt, args);
 
 	/* mark and strip a trailing newline */
 	if (text_len && text[text_len-1] == '\n') {
@@ -1602,6 +1617,10 @@ asmlinkage int vprintk_emit(int facility, int level,
 	lockdep_on();
 	local_irq_restore(flags);
 
+	/* If called from the scheduler, we can not call up(). */
+	if (in_sched)
+		return printed_len;
+
 	/*
 	 * Disable preemption to avoid being preempted while holding
 	 * console_sem which would prevent anyone from printing to console
@@ -2423,21 +2442,19 @@ late_initcall(printk_late_init);
 /*
  * Delayed printk version, for scheduler-internal messages:
  */
-#define PRINTK_BUF_SIZE		512
-
 #define PRINTK_PENDING_WAKEUP	0x01
-#define PRINTK_PENDING_SCHED	0x02
+#define PRINTK_PENDING_OUTPUT	0x02
 
 static DEFINE_PER_CPU(int, printk_pending);
-static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
 
 static void wake_up_klogd_work_func(struct irq_work *irq_work)
 {
 	int pending = __this_cpu_xchg(printk_pending, 0);
 
-	if (pending & PRINTK_PENDING_SCHED) {
-		char *buf = __get_cpu_var(printk_sched_buf);
-		printk(KERN_WARNING "[sched_delayed] %s", buf);
+	if (pending & PRINTK_PENDING_OUTPUT) {
+		/* If trylock fails, someone else is doing the printing */
+		if (console_trylock())
+			console_unlock();
 	}
 
 	if (pending & PRINTK_PENDING_WAKEUP)
@@ -2461,21 +2478,15 @@ void wake_up_klogd(void)
 
 int printk_sched(const char *fmt, ...)
 {
-	unsigned long flags;
 	va_list args;
-	char *buf;
 	int r;
 
-	local_irq_save(flags);
-	buf = __get_cpu_var(printk_sched_buf);
-
 	va_start(args, fmt);
-	r = vsnprintf(buf, PRINTK_BUF_SIZE, fmt, args);
+	r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args);
 	va_end(args);
 
-	__this_cpu_or(printk_pending, PRINTK_PENDING_SCHED);
+	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
 	irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
-	local_irq_restore(flags);
 
 	return r;
 }
-- 
1.8.1.4

  



[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]