On (12/22/16 23:01), Tetsuo Handa wrote: > > On (12/22/16 19:27), Tetsuo Handa wrote: > > > Thank you. I tried "[PATCHv6 0/7] printk: use printk_safe to handle printk() > > > recursive calls" at https://lkml.org/lkml/2016/12/21/232 on top of linux.git > > > as of commit 52bce91165e5f2db "splice: reinstate SIGPIPE/EPIPE handling", but > > > it turned out that your patch set does not solve this problem. > > > > > > I was assuming that sending to consoles from printk() is offloaded to a kernel > > > thread dedicated for that purpose, but your patch set does not do it. > > > > sorry, seems that I didn't deliver the information properly. > > > > https://gitlab.com/senozhatsky/linux-next-ss/commits/printk-safe-deferred > > > > there are 2 patch sets. the first one is printk-safe. the second one > > is async printk. > > > > 9 patches in total (as of now). > > > > can you access it? > > "404 The page you're looking for could not be found." > > Anonymous access not supported? oops... hm, dunno, it says : Visibility Level (?) : : Public : The project can be cloned without any authentication. I'll switch to github then may be. attached 9 patches. NOTE: not the final version. -ss
>From 7ad848869a18ceca63e85a248e83c669ec5daa84 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:54:42 +0900 Subject: [PATCH 1/9] printk: use vprintk_func in vprintk() vprintk(), just like printk(), better be using per-cpu printk_func instead of direct vprintk_emit() call. Just in case if vprintk() will ever be called from NMI, or from any other context that can deadlock in printk(). Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Reviewed-by: Steven Rostedt <rostedt@xxxxxxxxxxx> Reviewed-by: Petr Mladek <pmladek@xxxxxxxx> --- kernel/printk/printk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index e2cdd87e7a63..265e815730f9 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1803,7 +1803,7 @@ EXPORT_SYMBOL(vprintk_emit); asmlinkage int vprintk(const char *fmt, va_list args) { - return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + return vprintk_func(fmt, args); } EXPORT_SYMBOL(vprintk); -- 2.11.0
>From c0827cb43c638856c05e1adf388dcb0b2bd56b4d Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:54:47 +0900 Subject: [PATCH 2/9] printk: rename nmi.c and exported api A preparation patch for printk_safe work. No functional change. - rename nmi.c to print_safe.c - rename exported functions to have a `printk_safe' prefix. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Acked-by: Steven Rostedt <rostedt@xxxxxxxxxxx> Reviewed-by: Petr Mladek <pmladek@xxxxxxxx> --- arch/arm/kernel/smp.c | 4 +- include/linux/hardirq.h | 4 +- include/linux/printk.h | 20 +++++----- init/Kconfig | 16 ++++---- init/main.c | 2 +- kernel/kexec_core.c | 2 +- kernel/panic.c | 4 +- kernel/printk/Makefile | 2 +- kernel/printk/{nmi.c => printk_safe.c} | 72 +++++++++++++++++----------------- lib/nmi_backtrace.c | 2 +- 10 files changed, 65 insertions(+), 63 deletions(-) rename kernel/printk/{nmi.c => printk_safe.c} (77%) diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7dd14e8395e6..8b056e14158e 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -644,11 +644,11 @@ void handle_IPI(int ipinr, struct pt_regs *regs) break; case IPI_CPU_BACKTRACE: - printk_nmi_enter(); + printk_safe_nmi_enter(); irq_enter(); nmi_cpu_backtrace(regs); irq_exit(); - printk_nmi_exit(); + printk_safe_nmi_exit(); break; default: diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index c683996110b1..14840aafce3f 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -61,7 +61,7 @@ extern void irq_exit(void); #define nmi_enter() \ do { \ - printk_nmi_enter(); \ + printk_safe_nmi_enter(); \ lockdep_off(); \ ftrace_nmi_enter(); \ BUG_ON(in_nmi()); \ @@ -78,7 +78,7 @@ extern void irq_exit(void); preempt_count_sub(NMI_OFFSET + HARDIRQ_OFFSET); \ ftrace_nmi_exit(); \ lockdep_on(); \ - printk_nmi_exit(); \ + printk_safe_nmi_exit(); \ } while (0) #endif /* LINUX_HARDIRQ_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h index 3472cc6b7a60..67df41eca51c 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,17 +147,17 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_nmi_init(void); -extern void printk_nmi_enter(void); -extern void printk_nmi_exit(void); -extern void printk_nmi_flush(void); -extern void printk_nmi_flush_on_panic(void); +extern void printk_safe_init(void); +extern void printk_safe_nmi_enter(void); +extern void printk_safe_nmi_exit(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else -static inline void printk_nmi_init(void) { } -static inline void printk_nmi_enter(void) { } -static inline void printk_nmi_exit(void) { } -static inline void printk_nmi_flush(void) { } -static inline void printk_nmi_flush_on_panic(void) { } +static inline void printk_safe_init(void) { } +static inline void printk_safe_nmi_enter(void) { } +static inline void printk_safe_nmi_exit(void) { } +static inline void printk_safe_flush(void) { } +static inline void printk_safe_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK diff --git a/init/Kconfig b/init/Kconfig index 223b734abccd..760b7d0bc9d7 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -875,17 +875,19 @@ config LOG_CPU_MAX_BUF_SHIFT 13 => 8 KB for each CPU 12 => 4 KB for each CPU -config NMI_LOG_BUF_SHIFT - int "Temporary per-CPU NMI log buffer size (12 => 4KB, 13 => 8KB)" +config PRINTK_SAFE_LOG_BUF_SHIFT + int "Temporary per-CPU printk log buffer size (12 => 4KB, 13 => 8KB)" range 10 21 default 13 - depends on PRINTK_NMI + depends on PRINTK help - Select the size of a per-CPU buffer where NMI messages are temporary - stored. They are copied to the main log buffer in a safe context - to avoid a deadlock. The value defines the size as a power of 2. + Select the size of an alternate printk per-CPU buffer where messages + printed from usafe contexts are temporary stored. One example would + be NMI messages, another one - printk recursion. The messages are + copied to the main log buffer in a safe context to avoid a deadlock. + The value defines the size as a power of 2. - NMI messages are rare and limited. The largest one is when + Those messages are rare and limited. The largest one is when a backtrace is printed. It usually fits into 4KB. Select 8KB if you want to be on the safe side. diff --git a/init/main.c b/init/main.c index b8f88013462d..b08305a7138c 100644 --- a/init/main.c +++ b/init/main.c @@ -579,7 +579,7 @@ asmlinkage __visible void __init start_kernel(void) timekeeping_init(); time_init(); sched_clock_postinit(); - printk_nmi_init(); + printk_safe_init(); perf_event_init(); profile_init(); call_function_init(); diff --git a/kernel/kexec_core.c b/kernel/kexec_core.c index 5617cc412444..14bb9eb76665 100644 --- a/kernel/kexec_core.c +++ b/kernel/kexec_core.c @@ -916,7 +916,7 @@ void crash_kexec(struct pt_regs *regs) old_cpu = atomic_cmpxchg(&panic_cpu, PANIC_CPU_INVALID, this_cpu); if (old_cpu == PANIC_CPU_INVALID) { /* This is the 1st CPU which comes here, so go ahead. */ - printk_nmi_flush_on_panic(); + printk_safe_flush_on_panic(); __crash_kexec(regs); /* diff --git a/kernel/panic.c b/kernel/panic.c index c51edaa04fce..8c8efcd310e7 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -188,7 +188,7 @@ void panic(const char *fmt, ...) * Bypass the panic_cpu check and call __crash_kexec directly. */ if (!_crash_kexec_post_notifiers) { - printk_nmi_flush_on_panic(); + printk_safe_flush_on_panic(); __crash_kexec(NULL); /* @@ -213,7 +213,7 @@ void panic(const char *fmt, ...) atomic_notifier_call_chain(&panic_notifier_list, 0, buf); /* Call flush even twice. It tries harder with a single online CPU */ - printk_nmi_flush_on_panic(); + printk_safe_flush_on_panic(); kmsg_dump(KMSG_DUMP_PANIC); /* diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index abb0042a427b..607928119f26 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,3 @@ obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += nmi.o +obj-$(CONFIG_PRINTK_NMI) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/nmi.c b/kernel/printk/printk_safe.c similarity index 77% rename from kernel/printk/nmi.c rename to kernel/printk/printk_safe.c index f011aaef583c..09e5552f5b58 100644 --- a/kernel/printk/nmi.c +++ b/kernel/printk/printk_safe.c @@ -1,5 +1,5 @@ /* - * nmi.c - Safe printk in NMI context + * printk_safe.c - Safe printk in NMI context * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -39,18 +39,18 @@ * were handled or when IRQs are blocked. */ DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; -static int printk_nmi_irq_ready; +static int printk_safe_irq_ready; atomic_t nmi_message_lost; -#define NMI_LOG_BUF_LEN ((1 << CONFIG_NMI_LOG_BUF_SHIFT) - \ +#define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - sizeof(struct irq_work)) -struct nmi_seq_buf { +struct printk_safe_seq_buf { atomic_t len; /* length of written data */ struct irq_work work; /* IRQ work that flushes the buffer */ - unsigned char buffer[NMI_LOG_BUF_LEN]; + unsigned char buffer[SAFE_LOG_BUF_LEN]; }; -static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); +static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); /* * Safe printk() for NMI context. It uses a per-CPU buffer to @@ -58,9 +58,9 @@ static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq); * one writer running. But the buffer might get flushed from another * CPU, so we need to be careful. */ -static int vprintk_nmi(const char *fmt, va_list args) +static int vprintk_safe_nmi(const char *fmt, va_list args) { - struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); int add = 0; size_t len; @@ -91,7 +91,7 @@ static int vprintk_nmi(const char *fmt, va_list args) goto again; /* Get flushed in a more safe context. */ - if (add && printk_nmi_irq_ready) { + if (add && printk_safe_irq_ready) { /* Make sure that IRQ work is really initialized. */ smp_rmb(); irq_work_queue(&s->work); @@ -100,7 +100,7 @@ static int vprintk_nmi(const char *fmt, va_list args) return add; } -static void printk_nmi_flush_line(const char *text, int len) +static void printk_safe_flush_line(const char *text, int len) { /* * The buffers are flushed in NMI only on panic. The messages must @@ -111,11 +111,10 @@ static void printk_nmi_flush_line(const char *text, int len) printk_deferred("%.*s", len, text); else printk("%.*s", len, text); - } /* printk part of the temporary buffer line by line */ -static int printk_nmi_flush_buffer(const char *start, size_t len) +static int printk_safe_flush_buffer(const char *start, size_t len) { const char *c, *end; bool header; @@ -127,7 +126,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) /* Print line by line. */ while (c < end) { if (*c == '\n') { - printk_nmi_flush_line(start, c - start + 1); + printk_safe_flush_line(start, c - start + 1); start = ++c; header = true; continue; @@ -140,7 +139,7 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) continue; } - printk_nmi_flush_line(start, c - start); + printk_safe_flush_line(start, c - start); start = c++; header = true; continue; @@ -154,8 +153,8 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) if (start < end && !header) { static const char newline[] = KERN_CONT "\n"; - printk_nmi_flush_line(start, end - start); - printk_nmi_flush_line(newline, strlen(newline)); + printk_safe_flush_line(start, end - start); + printk_safe_flush_line(newline, strlen(newline)); } return len; @@ -165,11 +164,12 @@ static int printk_nmi_flush_buffer(const char *start, size_t len) * Flush data from the associated per_CPU buffer. The function * can be called either via IRQ work or independently. */ -static void __printk_nmi_flush(struct irq_work *work) +static void __printk_safe_flush(struct irq_work *work) { static raw_spinlock_t read_lock = __RAW_SPIN_LOCK_INITIALIZER(read_lock); - struct nmi_seq_buf *s = container_of(work, struct nmi_seq_buf, work); + struct printk_safe_seq_buf *s = + container_of(work, struct printk_safe_seq_buf, work); unsigned long flags; size_t len; int i; @@ -194,9 +194,9 @@ static void __printk_nmi_flush(struct irq_work *work) * buffer size. */ if ((i && i >= len) || len > sizeof(s->buffer)) { - const char *msg = "printk_nmi_flush: internal error\n"; + const char *msg = "printk_safe_flush: internal error\n"; - printk_nmi_flush_line(msg, strlen(msg)); + printk_safe_flush_line(msg, strlen(msg)); len = 0; } @@ -205,7 +205,7 @@ static void __printk_nmi_flush(struct irq_work *work) /* Make sure that data has been written up to the @len */ smp_rmb(); - i += printk_nmi_flush_buffer(s->buffer + i, len - i); + i += printk_safe_flush_buffer(s->buffer + i, len - i); /* * Check that nothing has got added in the meantime and truncate @@ -221,31 +221,31 @@ static void __printk_nmi_flush(struct irq_work *work) } /** - * printk_nmi_flush - flush all per-cpu nmi buffers. + * printk_safe_flush - flush all per-cpu nmi buffers. * * The buffers are flushed automatically via IRQ work. This function * is useful only when someone wants to be sure that all buffers have * been flushed at some point. */ -void printk_nmi_flush(void) +void printk_safe_flush(void) { int cpu; for_each_possible_cpu(cpu) - __printk_nmi_flush(&per_cpu(nmi_print_seq, cpu).work); + __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); } /** - * printk_nmi_flush_on_panic - flush all per-cpu nmi buffers when the system + * printk_safe_flush_on_panic - flush all per-cpu nmi buffers when the system * goes down. * - * Similar to printk_nmi_flush() but it can be called even in NMI context when + * Similar to printk_safe_flush() but it can be called even in NMI context when * the system goes down. It does the best effort to get NMI messages into * the main ring buffer. * * Note that it could try harder when there is only one CPU online. */ -void printk_nmi_flush_on_panic(void) +void printk_safe_flush_on_panic(void) { /* * Make sure that we could access the main ring buffer. @@ -259,33 +259,33 @@ void printk_nmi_flush_on_panic(void) raw_spin_lock_init(&logbuf_lock); } - printk_nmi_flush(); + printk_safe_flush(); } -void __init printk_nmi_init(void) +void __init printk_safe_init(void) { int cpu; for_each_possible_cpu(cpu) { - struct nmi_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu); - init_irq_work(&s->work, __printk_nmi_flush); + init_irq_work(&s->work, __printk_safe_flush); } /* Make sure that IRQ works are initialized before enabling. */ smp_wmb(); - printk_nmi_irq_ready = 1; + printk_safe_irq_ready = 1; /* Flush pending messages that did not have scheduled IRQ works. */ - printk_nmi_flush(); + printk_safe_flush(); } -void printk_nmi_enter(void) +void printk_safe_nmi_enter(void) { - this_cpu_write(printk_func, vprintk_nmi); + this_cpu_write(printk_func, vprintk_safe_nmi); } -void printk_nmi_exit(void) +void printk_safe_nmi_exit(void) { this_cpu_write(printk_func, vprintk_default); } diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 75554754eadf..5f7999eacad5 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -77,7 +77,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, * Force flush any remote buffers that might be stuck in IRQ context * and therefore could not run their irq_work. */ - printk_nmi_flush(); + printk_safe_flush(); clear_bit_unlock(0, &backtrace_flag); put_cpu(); -- 2.11.0
>From 3b5c708ee3313aa09b1bac924b07aa4ae0cdbddb Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:55:39 +0900 Subject: [PATCH 3/9] printk: introduce per-cpu safe_print seq buffer This patch extends the idea of NMI per-cpu buffers to regions that may cause recursive printk() calls and possible deadlocks. Namely, printk() can't handle printk calls from schedule code or printk() calls from lock debugging code (spin_dump() for instance); because those may be called with `sem->lock' already taken or any other `critical' locks (p->pi_lock, etc.). An example of deadlock can be vprintk_emit() console_unlock() up() << raw_spin_lock_irqsave(&sem->lock, flags); wake_up_process() try_to_wake_up() ttwu_queue() ttwu_activate() activate_task() enqueue_task() enqueue_task_fair() cfs_rq_of() task_of() WARN_ON_ONCE(!entity_is_task(se)) vprintk_emit() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags) ^^^^ deadlock and some other cases. Just like in NMI implementation, the solution uses a per-cpu `printk_func' pointer to 'redirect' printk() calls to a 'safe' callback, that store messages in a per-cpu buffer and flushes them back to logbuf buffer later. Usage example: printk() printk_safe_enter(flags) // // any printk() call from here will endup in vprintk_safe(), // that stores messages in a special per-CPU buffer. // printk_safe_exit(flags) The 'redirection' mechanism, though, has been reworked, as suggested by Petr Mladek. Instead of using a per-cpu @print_func callback we now keep a per-cpu printk-context variable and call either default or nmi vprintk function depending on its value. printk_nmi_entrer/exit and printk_safe_enter/exit, thus, just set/celar corresponding bits in printk-context functions. The patch only adds printk_safe support, we don't use it yet. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> --- include/linux/printk.h | 21 +++++-- kernel/printk/Makefile | 2 +- kernel/printk/internal.h | 56 ++++++++++------- kernel/printk/printk.c | 3 - kernel/printk/printk_safe.c | 143 ++++++++++++++++++++++++++++++++++---------- 5 files changed, 162 insertions(+), 63 deletions(-) diff --git a/include/linux/printk.h b/include/linux/printk.h index 67df41eca51c..f7d291cf61d9 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -147,17 +147,11 @@ void early_printk(const char *s, ...) { } #endif #ifdef CONFIG_PRINTK_NMI -extern void printk_safe_init(void); extern void printk_safe_nmi_enter(void); extern void printk_safe_nmi_exit(void); -extern void printk_safe_flush(void); -extern void printk_safe_flush_on_panic(void); #else -static inline void printk_safe_init(void) { } static inline void printk_safe_nmi_enter(void) { } static inline void printk_safe_nmi_exit(void) { } -static inline void printk_safe_flush(void) { } -static inline void printk_safe_flush_on_panic(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK @@ -209,6 +203,9 @@ void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); void show_regs_print_info(const char *log_lvl); +extern void printk_safe_init(void); +extern void printk_safe_flush(void); +extern void printk_safe_flush_on_panic(void); #else static inline __printf(1, 0) int vprintk(const char *s, va_list args) @@ -268,6 +265,18 @@ static inline void dump_stack_print_info(const char *log_lvl) static inline void show_regs_print_info(const char *log_lvl) { } + +static inline void printk_safe_init(void) +{ +} + +static inline void printk_safe_flush(void) +{ +} + +static inline void printk_safe_flush_on_panic(void) +{ +} #endif extern asmlinkage void dump_stack(void) __cold; diff --git a/kernel/printk/Makefile b/kernel/printk/Makefile index 607928119f26..4a2ffc39eb95 100644 --- a/kernel/printk/Makefile +++ b/kernel/printk/Makefile @@ -1,3 +1,3 @@ obj-y = printk.o -obj-$(CONFIG_PRINTK_NMI) += printk_safe.o +obj-$(CONFIG_PRINTK) += printk_safe.o obj-$(CONFIG_A11Y_BRAILLE_CONSOLE) += braille.o diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 7fd2838fa417..97cee4f482f4 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,26 +16,8 @@ */ #include <linux/percpu.h> -typedef __printf(1, 0) int (*printk_func_t)(const char *fmt, va_list args); - -int __printf(1, 0) vprintk_default(const char *fmt, va_list args); - #ifdef CONFIG_PRINTK_NMI -extern raw_spinlock_t logbuf_lock; - -/* - * printk() could not take logbuf_lock in NMI context. Instead, - * it temporary stores the strings into a per-CPU buffer. - * The alternative implementation is chosen transparently - * via per-CPU variable. - */ -DECLARE_PER_CPU(printk_func_t, printk_func); -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return this_cpu_read(printk_func)(fmt, args); -} - extern atomic_t nmi_message_lost; static inline int get_nmi_message_lost(void) { @@ -44,14 +26,42 @@ static inline int get_nmi_message_lost(void) #else /* CONFIG_PRINTK_NMI */ -static inline __printf(1, 0) int vprintk_func(const char *fmt, va_list args) -{ - return vprintk_default(fmt, args); -} - static inline int get_nmi_message_lost(void) { return 0; } #endif /* CONFIG_PRINTK_NMI */ + +#ifdef CONFIG_PRINTK + +#define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff +#define PRINTK_NMI_CONTEXT_MASK 0x80000000 + +extern raw_spinlock_t logbuf_lock; + +__printf(1, 0) int vprintk_default(const char *fmt, va_list args); +__printf(1, 0) int vprintk_func(const char *fmt, va_list args); +void __printk_safe_enter(void); +void __printk_safe_exit(void); + +#define printk_safe_enter(flags) \ + do { \ + local_irq_save(flags); \ + __printk_safe_enter(); \ + } while (0) + +#define printk_safe_exit(flags) \ + do { \ + __printk_safe_exit(); \ + local_irq_restore(flags); \ + } while (0) + +#else + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } + +#define printk_safe_enter(f) ((void)(f)) +#define printk_safe_exit(f) ((void)(f)) + +#endif /* CONFIG_PRINTK */ diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 265e815730f9..335b0fcb538e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1902,9 +1902,6 @@ static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } -/* Still needs to be defined for users */ -DEFINE_PER_CPU(printk_func_t, printk_func); - #endif /* CONFIG_PRINTK */ #ifdef CONFIG_EARLY_PRINTK diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 09e5552f5b58..a170c0911252 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -1,5 +1,5 @@ /* - * printk_safe.c - Safe printk in NMI context + * printk_safe.c - Safe printk for printk-deadlock-prone contexts * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -32,15 +32,14 @@ * is later flushed into the main ring buffer via IRQ work. * * The alternative implementation is chosen transparently - * via @printk_func per-CPU variable. + * by examinig current printk() context mask stored in @printk_context + * per-CPU variable. * * The implementation allows to flush the strings also from another CPU. * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default; static int printk_safe_irq_ready; -atomic_t nmi_message_lost; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - sizeof(struct irq_work)) @@ -50,32 +49,41 @@ struct printk_safe_seq_buf { struct irq_work work; /* IRQ work that flushes the buffer */ unsigned char buffer[SAFE_LOG_BUF_LEN]; }; + +static DEFINE_PER_CPU(struct printk_safe_seq_buf, safe_print_seq); +static DEFINE_PER_CPU(int, printk_context); + +#ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); +atomic_t nmi_message_lost; +#endif /* - * Safe printk() for NMI context. It uses a per-CPU buffer to - * store the message. NMIs are not nested, so there is always only - * one writer running. But the buffer might get flushed from another - * CPU, so we need to be careful. + * Add a message to per-CPU context-dependent buffer. NMI and printk-safe + * have dedicated buffers, because otherwise printk-safe preempted by + * NMI-printk would have overwritten the NMI messages. + * + * The messages are fushed from irq work (of from panic()), possibly, + * from other CPU, concurrently with printk_safe_log_store(). Should this + * happen, printk_safe_log_store() will notice the buffer->len mismatch + * and repeat the write. */ -static int vprintk_safe_nmi(const char *fmt, va_list args) +static int printk_safe_log_store(struct printk_safe_seq_buf *s, + const char *fmt, va_list args) { - struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add = 0; + int add; size_t len; again: len = atomic_read(&s->len); /* The trailing '\0' is not counted into len. */ - if (len >= sizeof(s->buffer) - 1) { - atomic_inc(&nmi_message_lost); - return 0; - } + if (len >= sizeof(s->buffer) - 1) + return -ENOSPC; /* - * Make sure that all old data have been read before the buffer was - * reseted. This is not needed when we just append data. + * Make sure that all old data have been read before the buffer + * was reset. This is not needed when we just append data. */ if (!len) smp_rmb(); @@ -161,7 +169,7 @@ static int printk_safe_flush_buffer(const char *start, size_t len) } /* - * Flush data from the associated per_CPU buffer. The function + * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. */ static void __printk_safe_flush(struct irq_work *work) @@ -231,8 +239,12 @@ void printk_safe_flush(void) { int cpu; - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { +#ifdef CONFIG_PRINTK_NMI __printk_safe_flush(&per_cpu(nmi_print_seq, cpu).work); +#endif + __printk_safe_flush(&per_cpu(safe_print_seq, cpu).work); + } } /** @@ -262,14 +274,95 @@ void printk_safe_flush_on_panic(void) printk_safe_flush(); } +#ifdef CONFIG_PRINTK_NMI +/* + * Safe printk() for NMI context. It uses a per-CPU buffer to + * store the message. NMIs are not nested, so there is always only + * one writer running. But the buffer might get flushed from another + * CPU, so we need to be careful. + */ +static int vprintk_safe_nmi(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); + int add; + + add = printk_safe_log_store(s, fmt, args); + if (add == -ENOSPC) { + atomic_inc(&nmi_message_lost); + add = 0; + } + + return add; +} + +void printk_safe_nmi_enter(void) +{ + this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); +} + +void printk_safe_nmi_exit(void) +{ + this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); +} + +#else + +static int vprintk_safe_nmi(const char *fmt, va_list args) +{ + return 0; +} + +#endif /* CONFIG_PRINTK_NMI */ + +/* + * Lock-less printk(), to avoid deadlocks should the printk() recurse + * into itself. It uses a per-CPU buffer to store the message, just like + * NMI. + */ +static int vprintk_safe(const char *fmt, va_list args) +{ + struct printk_safe_seq_buf *s = this_cpu_ptr(&safe_print_seq); + + return printk_safe_log_store(s, fmt, args); +} + +/* Can be preempted by NMI. */ +void __printk_safe_enter(void) +{ + this_cpu_inc(printk_context); +} + +/* Can be preempted by NMI. */ +void __printk_safe_exit(void) +{ + this_cpu_dec(printk_context); +} + +__printf(1, 0) int vprintk_func(const char *fmt, va_list args) +{ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + return vprintk_safe_nmi(fmt, args); + + if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) + return vprintk_safe(fmt, args); + + return vprintk_default(fmt, args); +} + void __init printk_safe_init(void) { int cpu; for_each_possible_cpu(cpu) { - struct printk_safe_seq_buf *s = &per_cpu(nmi_print_seq, cpu); + struct printk_safe_seq_buf *s; + + s = &per_cpu(safe_print_seq, cpu); + init_irq_work(&s->work, __printk_safe_flush); +#ifdef CONFIG_PRINTK_NMI + s = &per_cpu(nmi_print_seq, cpu); init_irq_work(&s->work, __printk_safe_flush); +#endif } /* Make sure that IRQ works are initialized before enabling. */ @@ -279,13 +372,3 @@ void __init printk_safe_init(void) /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); } - -void printk_safe_nmi_enter(void) -{ - this_cpu_write(printk_func, vprintk_safe_nmi); -} - -void printk_safe_nmi_exit(void) -{ - this_cpu_write(printk_func, vprintk_default); -} -- 2.11.0
>From 36b64a58886c95155ffdef1d59775a3f33f6f2d8 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:56:15 +0900 Subject: [PATCH 4/9] printk: always use deferred printk when flush printk_safe lines Always use printk_deferred() in printk_safe_flush_line(). Flushing can be done from NMI or printk_safe contexts (when we are in panic), so we can't call console drivers, yet still want to store the messages in the logbuf buffer. Therefore we use a deferred printk version. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Suggested-by: Petr Mladek <pmladek@xxxxxxxx> Reviewed-by: Petr Mladek <pmladek@xxxxxxxx> --- kernel/printk/printk_safe.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index a170c0911252..57c67f14728c 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -111,14 +111,12 @@ static int printk_safe_log_store(struct printk_safe_seq_buf *s, static void printk_safe_flush_line(const char *text, int len) { /* - * The buffers are flushed in NMI only on panic. The messages must - * go only into the ring buffer at this stage. Consoles will get - * explicitly called later when a crashdump is not generated. + * Avoid any console drivers calls from here, because we may be + * in NMI or printk_safe context (when in panic). The messages + * must go only into the ring buffer at this stage. Consoles will + * get explicitly called later when a crashdump is not generated. */ - if (in_nmi()) - printk_deferred("%.*s", len, text); - else - printk("%.*s", len, text); + printk_deferred("%.*s", len, text); } /* printk part of the temporary buffer line by line */ -- 2.11.0
>From 623dc222fb13e34ccc135a313e782265f37605aa Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:56:15 +0900 Subject: [PATCH 5/9] printk: report lost messages in printk safe/nmi contexts Account lost messages in pritk-safe and printk-safe-nmi contexts and report those numbers during printk_safe_flush(). The patch also moves lost message counter to struct `printk_safe_seq_buf' instead of having dedicated static counters - this simplifies the code. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Reviewed-by: Petr Mladek <pmladek@xxxxxxxx> --- kernel/printk/internal.h | 17 ----------------- kernel/printk/printk.c | 10 ---------- kernel/printk/printk_safe.c | 31 +++++++++++++++++++------------ 3 files changed, 19 insertions(+), 39 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 97cee4f482f4..d2456020120c 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -16,23 +16,6 @@ */ #include <linux/percpu.h> -#ifdef CONFIG_PRINTK_NMI - -extern atomic_t nmi_message_lost; -static inline int get_nmi_message_lost(void) -{ - return atomic_xchg(&nmi_message_lost, 0); -} - -#else /* CONFIG_PRINTK_NMI */ - -static inline int get_nmi_message_lost(void) -{ - return 0; -} - -#endif /* CONFIG_PRINTK_NMI */ - #ifdef CONFIG_PRINTK #define PRINTK_SAFE_CONTEXT_MASK 0x7fffffff diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 335b0fcb538e..6ec61ae0d122 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1677,7 +1677,6 @@ asmlinkage int vprintk_emit(int facility, int level, unsigned long flags; int this_cpu; int printed_len = 0; - int nmi_message_lost; bool in_sched = false; /* cpu currently holding logbuf_lock in this function */ static unsigned int logbuf_cpu = UINT_MAX; @@ -1728,15 +1727,6 @@ asmlinkage int vprintk_emit(int facility, int level, strlen(recursion_msg)); } - nmi_message_lost = get_nmi_message_lost(); - if (unlikely(nmi_message_lost)) { - text_len = scnprintf(textbuf, sizeof(textbuf), - "BAD LUCK: lost %d message(s) from NMI context!", - nmi_message_lost); - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, textbuf, text_len); - } - /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index 57c67f14728c..cd5160112374 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -42,10 +42,13 @@ static int printk_safe_irq_ready; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ - sizeof(atomic_t) - sizeof(struct irq_work)) + sizeof(atomic_t) - \ + sizeof(atomic_t) - \ + sizeof(struct irq_work)) struct printk_safe_seq_buf { atomic_t len; /* length of written data */ + atomic_t message_lost; struct irq_work work; /* IRQ work that flushes the buffer */ unsigned char buffer[SAFE_LOG_BUF_LEN]; }; @@ -55,7 +58,6 @@ static DEFINE_PER_CPU(int, printk_context); #ifdef CONFIG_PRINTK_NMI static DEFINE_PER_CPU(struct printk_safe_seq_buf, nmi_print_seq); -atomic_t nmi_message_lost; #endif /* @@ -78,8 +80,10 @@ static int printk_safe_log_store(struct printk_safe_seq_buf *s, len = atomic_read(&s->len); /* The trailing '\0' is not counted into len. */ - if (len >= sizeof(s->buffer) - 1) - return -ENOSPC; + if (len >= sizeof(s->buffer) - 1) { + atomic_inc(&s->message_lost); + return 0; + } /* * Make sure that all old data have been read before the buffer @@ -166,6 +170,14 @@ static int printk_safe_flush_buffer(const char *start, size_t len) return len; } +static void report_message_lost(struct printk_safe_seq_buf *s) +{ + int lost = atomic_xchg(&s->message_lost, 0); + + if (lost) + printk_deferred("Lost %d message(s)!\n", lost); +} + /* * Flush data from the associated per-CPU buffer. The function * can be called either via IRQ work or independently. @@ -222,6 +234,8 @@ static void __printk_safe_flush(struct irq_work *work) if (atomic_cmpxchg(&s->len, len, 0) != len) goto more; + report_message_lost(s); + out: raw_spin_unlock_irqrestore(&read_lock, flags); } @@ -282,15 +296,8 @@ void printk_safe_flush_on_panic(void) static int vprintk_safe_nmi(const char *fmt, va_list args) { struct printk_safe_seq_buf *s = this_cpu_ptr(&nmi_print_seq); - int add; - - add = printk_safe_log_store(s, fmt, args); - if (add == -ENOSPC) { - atomic_inc(&nmi_message_lost); - add = 0; - } - return add; + return printk_safe_log_store(s, fmt, args); } void printk_safe_nmi_enter(void) -- 2.11.0
>From 40b6dd9b1fc4398f5eb3beab016755b2abf4bb25 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:56:15 +0900 Subject: [PATCH 6/9] printk: use printk_safe buffers in printk Use printk_safe per-CPU buffers in printk recursion-prone blocks: -- around logbuf_lock protected sections in vprintk_emit() and console_unlock() -- around down_trylock_console_sem() and up_console_sem() Note that this solution addresses deadlocks caused by printk() recursive calls only. That is vprintk_emit() and console_unlock(). Another thing to note is that we now keep lockdep enabled in printk, because we are protected against the printk recursion caused by lockdep in vprintk_emit() by the printk-safe mechanism - we first switch to per-CPU buffers and only then access the deadlock-prone locks. Examples: 1) printk() from logbuf_lock spin_lock section Assume the following code: printk() raw_spin_lock(&logbuf_lock); WARN_ON(1); raw_spin_unlock(&logbuf_lock); which now produces: ------------[ cut here ]------------ WARNING: CPU: 0 PID: 366 at kernel/printk/printk.c:1811 vprintk_emit+0x1cd/0x438 CPU: 0 PID: 366 Comm: bash Call Trace: [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f [<ffffffff81091527>] vprintk_emit+0x1cd/0x438 [<ffffffff810918fe>] vprintk_default+0x1d/0x1f [<ffffffff810fdf6a>] printk+0x48/0x50 [..] [<ffffffff813caaaa>] entry_SYSCALL_64_fastpath+0x18/0xad ---[ end trace ]--- 2) printk() from semaphore sem->lock spin_lock section Assume the following code printk() console_trylock() down_trylock() raw_spin_lock_irqsave(&sem->lock, flags); WARN_ON(1); raw_spin_unlock_irqrestore(&sem->lock, flags); which now produces: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 363 at kernel/locking/semaphore.c:141 down_trylock+0x3d/0x62 CPU: 1 PID: 363 Comm: bash Call Trace: [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f [<ffffffff810838df>] down_trylock+0x3d/0x62 [<ffffffff8109177e>] ? vprintk_emit+0x3f9/0x414 [<ffffffff810905cb>] console_trylock+0x31/0xeb [<ffffffff8109177e>] vprintk_emit+0x3f9/0x414 [<ffffffff81091905>] vprintk_default+0x1d/0x1f [<ffffffff810fdf71>] printk+0x48/0x50 [..] [<ffffffff813caaaa>] entry_SYSCALL_64_fastpath+0x18/0xad ---[ end trace ]--- 3) printk() from console_unlock() Assume the following code: printk() console_unlock() raw_spin_lock(&logbuf_lock); WARN_ON(1); raw_spin_unlock(&logbuf_lock); which now produces: ------------[ cut here ]------------ WARNING: CPU: 1 PID: 329 at kernel/printk/printk.c:2384 console_unlock+0x12d/0x559 CPU: 1 PID: 329 Comm: bash Call Trace: [<ffffffff8103d6ca>] warn_slowpath_null+0x18/0x1a [<ffffffff8107f917>] console_unlock+0x12d/0x559 [<ffffffff810772c4>] ? trace_hardirqs_on_caller+0x16d/0x189 [<ffffffff810772ed>] ? trace_hardirqs_on+0xd/0xf [<ffffffff810800a6>] vprintk_emit+0x363/0x374 [<ffffffff81080219>] vprintk_default+0x18/0x1a [<ffffffff810c7f77>] printk+0x43/0x4b [..] [<ffffffff814c2d6a>] entry_SYSCALL_64_fastpath+0x18/0xad ---[ end trace ]--- 4) printk() from try_to_wake_up() Assume the following code: printk() console_unlock() up() try_to_wake_up() raw_spin_lock_irqsave(&p->pi_lock, flags); WARN_ON(1); raw_spin_unlock_irqrestore(&p->pi_lock, flags); which now produces: ------------[ cut here ]------------ WARNING: CPU: 3 PID: 363 at kernel/sched/core.c:2028 try_to_wake_up+0x7f/0x4f7 CPU: 3 PID: 363 Comm: bash Call Trace: [<ffffffff81045658>] warn_slowpath_null+0x1d/0x1f [<ffffffff8106c8d0>] try_to_wake_up+0x7f/0x4f7 [<ffffffff8106cd5d>] wake_up_process+0x15/0x17 [<ffffffff813c82c6>] __up.isra.0+0x56/0x63 [<ffffffff810839a3>] up+0x32/0x42 [<ffffffff8108f624>] __up_console_sem+0x37/0x55 [<ffffffff810910ff>] console_unlock+0x21e/0x4c2 [<ffffffff810917bf>] vprintk_emit+0x41c/0x462 [<ffffffff81091971>] vprintk_default+0x1d/0x1f [<ffffffff810fdfdd>] printk+0x48/0x50 [..] [<ffffffff813cab2a>] entry_SYSCALL_64_fastpath+0x18/0xad ---[ end trace ]--- 5) printk() from call_console_drivers() Assume the following code: printk() console_unlock() call_console_drivers() ... WARN_ON(1); which now produces: ------------[ cut here ]------------ WARNING: CPU: 2 PID: 305 at kernel/printk/printk.c:1604 call_console_drivers+0x3a/0xb0 CPU: 2 PID: 305 Comm: bash Call Trace: [<ffffffff810399c0>] warn_slowpath_null+0x18/0x1a [<ffffffff8106b218>] call_console_drivers.isra.6.constprop.16+0x3a/0xb0 [<ffffffff8106cc46>] console_unlock+0x471/0x48e [<ffffffff8106ce57>] vprintk_emit+0x1f4/0x206 [<ffffffff8106cfcb>] vprintk_default+0x18/0x1a [<ffffffff8106af66>] vprintk_func+0x6e/0x70 [<ffffffff810abbe3>] printk+0x3e/0x46 [..] [<ffffffff8145a860>] entry_SYSCALL_64_fastpath+0x13/0x94 ---[ end trace ]--- Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Reviewed-by: Petr Mladek <pmladek@xxxxxxxx> --- kernel/printk/printk.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 6ec61ae0d122..c188219e596f 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -213,17 +213,31 @@ static int nr_ext_console_drivers; static int __down_trylock_console_sem(unsigned long ip) { - if (down_trylock(&console_sem)) + int lock_failed; + unsigned long flags; + + printk_safe_enter(flags); + lock_failed = down_trylock(&console_sem); + printk_safe_exit(flags); + + if (lock_failed) return 1; mutex_acquire(&console_lock_dep_map, 0, 1, ip); return 0; } #define down_trylock_console_sem() __down_trylock_console_sem(_RET_IP_) -#define up_console_sem() do { \ - mutex_release(&console_lock_dep_map, 1, _RET_IP_);\ - up(&console_sem);\ -} while (0) +static void __up_console_sem(unsigned long ip) +{ + unsigned long flags; + + mutex_release(&console_lock_dep_map, 1, ip); + + printk_safe_enter(flags); + up(&console_sem); + printk_safe_exit(flags); +} +#define up_console_sem() __up_console_sem(_RET_IP_) /* * This is used for debugging the mess that is the VT code by @@ -1689,7 +1703,7 @@ asmlinkage int vprintk_emit(int facility, int level, boot_delay_msec(level); printk_delay(); - local_irq_save(flags); + printk_safe_enter(flags); this_cpu = smp_processor_id(); /* @@ -1705,13 +1719,12 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (!oops_in_progress && !lockdep_recursing(current)) { recursion_bug = true; - local_irq_restore(flags); + printk_safe_exit(flags); return 0; } zap_locks(); } - lockdep_off(); /* This stops the holder of console_sem just where we want him */ raw_spin_lock(&logbuf_lock); logbuf_cpu = this_cpu; @@ -1771,12 +1784,10 @@ asmlinkage int vprintk_emit(int facility, int level, logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); - lockdep_on(); - local_irq_restore(flags); + printk_safe_exit(flags); /* If called from the scheduler, we can not call up(). */ if (!in_sched) { - lockdep_off(); /* * Try to acquire and then immediately release the console * semaphore. The release will print out buffers and wake up @@ -1784,7 +1795,6 @@ asmlinkage int vprintk_emit(int facility, int level, */ if (console_trylock()) console_unlock(); - lockdep_on(); } return printed_len; @@ -2209,7 +2219,8 @@ void console_unlock(void) size_t len; int level; - raw_spin_lock_irqsave(&logbuf_lock, flags); + printk_safe_enter(flags); + raw_spin_lock(&logbuf_lock); if (seen_seq != log_next_seq) { wake_klogd = true; seen_seq = log_next_seq; @@ -2259,7 +2270,7 @@ void console_unlock(void) stop_critical_timings(); /* don't trace print latency */ call_console_drivers(level, ext_text, ext_len, text, len); start_critical_timings(); - local_irq_restore(flags); + printk_safe_exit(flags); if (do_cond_resched) cond_resched(); @@ -2282,7 +2293,8 @@ void console_unlock(void) */ raw_spin_lock(&logbuf_lock); retry = console_seq != log_next_seq; - raw_spin_unlock_irqrestore(&logbuf_lock, flags); + raw_spin_unlock(&logbuf_lock); + printk_safe_exit(flags); if (retry && console_trylock()) goto again; -- 2.11.0
>From 016f6c58a457e2b0c36dcc876d1a89790452e0ea Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:56:15 +0900 Subject: [PATCH 7/9] printk: remove zap_locks() function We use printk-safe now which makes printk-recursion detection code in vprintk_emit() unreachable. The tricky thing here is that, apart from detecting and reporting printk recursions, that code also used to zap_locks() in case of panic() from the same CPU. However, zap_locks() does not look to be needed anymore: 1) Since commit 08d78658f393 ("panic: release stale console lock to always get the logbuf printed out") panic flushing of `logbuf' to console ignores the state of `console_sem' by doing panic() console_trylock(); console_unlock(); 2) Since commit cf9b1106c81c ("printk/nmi: flush NMI messages on the system panic") panic attempts to zap the `logbuf_lock' spin_lock to successfully flush nmi messages to `logbuf'. Basically, it seems that we either already do what zap_locks() used to do but in other places or we ignore the state of the lock. The only reaming difference is that we don't re-init the console semaphore in printk_safe_flush_on_panic(), but this is not necessary because we don't call console drivers from printk_safe_flush_on_panic() due to the fact that we are using a deferred printk() version (as was suggested by Petr Mladek). Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Reviewed-by: Petr Mladek <pmladek@xxxxxxxx> --- kernel/printk/printk.c | 61 -------------------------------------------------- 1 file changed, 61 deletions(-) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index c188219e596f..128b4d2161fd 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1552,28 +1552,6 @@ static void call_console_drivers(int level, } } -/* - * Zap console related locks when oopsing. - * To leave time for slow consoles to print a full oops, - * only zap at most once every 30 seconds. - */ -static void zap_locks(void) -{ - static unsigned long oops_timestamp; - - if (time_after_eq(jiffies, oops_timestamp) && - !time_after(jiffies, oops_timestamp + 30 * HZ)) - return; - - oops_timestamp = jiffies; - - debug_locks_off(); - /* If a crash is occurring, make sure we can't deadlock */ - raw_spin_lock_init(&logbuf_lock); - /* And make sure that we print immediately */ - sema_init(&console_sem, 1); -} - int printk_delay_msec __read_mostly; static inline void printk_delay(void) @@ -1683,17 +1661,13 @@ asmlinkage int vprintk_emit(int facility, int level, const char *dict, size_t dictlen, const char *fmt, va_list args) { - static bool recursion_bug; static char textbuf[LOG_LINE_MAX]; char *text = textbuf; size_t text_len = 0; enum log_flags lflags = 0; unsigned long flags; - int this_cpu; int printed_len = 0; bool in_sched = false; - /* cpu currently holding logbuf_lock in this function */ - static unsigned int logbuf_cpu = UINT_MAX; if (level == LOGLEVEL_SCHED) { level = LOGLEVEL_DEFAULT; @@ -1704,42 +1678,8 @@ asmlinkage int vprintk_emit(int facility, int level, printk_delay(); printk_safe_enter(flags); - this_cpu = smp_processor_id(); - - /* - * Ouch, printk recursed into itself! - */ - if (unlikely(logbuf_cpu == this_cpu)) { - /* - * If a crash is occurring during printk() on this CPU, - * then try to get the crash message out but make sure - * we can't deadlock. Otherwise just return to avoid the - * recursion and return - but flag the recursion so that - * it can be printed at the next appropriate moment: - */ - if (!oops_in_progress && !lockdep_recursing(current)) { - recursion_bug = true; - printk_safe_exit(flags); - return 0; - } - zap_locks(); - } - /* This stops the holder of console_sem just where we want him */ raw_spin_lock(&logbuf_lock); - logbuf_cpu = this_cpu; - - if (unlikely(recursion_bug)) { - static const char recursion_msg[] = - "BUG: recent printk recursion!"; - - recursion_bug = false; - /* emit KERN_CRIT message */ - printed_len += log_store(0, 2, LOG_PREFIX|LOG_NEWLINE, 0, - NULL, 0, recursion_msg, - strlen(recursion_msg)); - } - /* * The printf needs to come first; we need the syslog * prefix which might be passed-in as a parameter. @@ -1782,7 +1722,6 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); - logbuf_cpu = UINT_MAX; raw_spin_unlock(&logbuf_lock); printk_safe_exit(flags); -- 2.11.0
>From 42f3bac9de6eb4c72871386ff78f5e8a56f3ea16 Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Wed, 21 Dec 2016 00:15:04 +0900 Subject: [PATCH 8/9] printk: turn printk into printk_deferred With the printk-safe per-CPU buffer we addressed the problem of printk deadlocks caused by printk recursion: printk -> spin_lock -> WARN -> printk -> spin_lock There are, however, more scenarios that can lead to deadlock in printk. The main problem is that printk depends on two types of locks: internal locks (logbuf lock, console semaphore) and external locks (scheduler locks, console drivers, etc.). We can control internal locks, and this is what printk-safe does, but we don't control the external ones: printk may be called with one of those locks already locked by that CPU. The solution is to avoid taking any external locks from printk, but acquire only internal locks. This is, basically, what deferred printk does - it acquires only logbuf spin_lock to store the message in the logbuf, while the printing is performed later from a safe context or even on another CPU. This patch turns all printk calls into printk_deferred calls. In other words, every printk call now adds a per-CPU irq_work to per-CPU raised list, which, basically, implies that we need a) initialized per-CPU zones and b) initialized IRQs. We have printk_safe_init(), which we call from start_kernel() when both a) and b) are done so we know that we can start queuing printk-safe irq_works. We can use the very same flag in vprintk_emit() as well: rename it to `printk_safe_init_done' and make it visible to vprintk_emit(). vprintk_emit(), thus, does not queue any irq work as long as `printk_safe_init_done' is `false'. This does not lead to any message loss, but just delays a bit the time when we start printing messages to the console. If panic() happens somewhere before printk_safe_init() then console_flush_on_panic() will take care of the logbuf messages. Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> --- kernel/printk/internal.h | 1 + kernel/printk/printk.c | 103 +++++++++++++++++++------------------------- kernel/printk/printk_safe.c | 6 +-- 3 files changed, 48 insertions(+), 62 deletions(-) diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index d2456020120c..9a47325890ca 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -22,6 +22,7 @@ #define PRINTK_NMI_CONTEXT_MASK 0x80000000 extern raw_spinlock_t logbuf_lock; +extern bool printk_safe_init_done; __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_func(const char *fmt, va_list args); diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 128b4d2161fd..fee5e0ea4a62 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -401,6 +401,40 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; +#define PRINTK_PENDING_WAKEUP 0x01 +#define PRINTK_PENDING_OUTPUT 0x02 + +static DEFINE_PER_CPU(int, printk_pending); + +static void wake_up_klogd_work_func(struct irq_work *irq_work) +{ + int pending = __this_cpu_xchg(printk_pending, 0); + + if (pending & PRINTK_PENDING_OUTPUT) { + /* If trylock fails, someone else is doing the printing */ + if (console_trylock()) + console_unlock(); + } + + if (pending & PRINTK_PENDING_WAKEUP) + wake_up_interruptible(&log_wait); +} + +static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { + .func = wake_up_klogd_work_func, + .flags = IRQ_WORK_LAZY, +}; + +void wake_up_klogd(void) +{ + preempt_disable(); + if (waitqueue_active(&log_wait)) { + this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); + } + preempt_enable(); +} + /* Return log buffer address */ char *log_buf_addr_get(void) { @@ -1667,12 +1701,6 @@ asmlinkage int vprintk_emit(int facility, int level, enum log_flags lflags = 0; unsigned long flags; int printed_len = 0; - bool in_sched = false; - - if (level == LOGLEVEL_SCHED) { - level = LOGLEVEL_DEFAULT; - in_sched = true; - } boot_delay_msec(level); printk_delay(); @@ -1723,18 +1751,17 @@ asmlinkage int vprintk_emit(int facility, int level, printed_len += log_output(facility, level, lflags, dict, dictlen, text, text_len); raw_spin_unlock(&logbuf_lock); - printk_safe_exit(flags); - /* If called from the scheduler, we can not call up(). */ - if (!in_sched) { - /* - * Try to acquire and then immediately release the console - * semaphore. The release will print out buffers and wake up - * /dev/kmsg and syslog() users. - */ - if (console_trylock()) - console_unlock(); + /* + * We are about to add a per-CPU irq_work structure to the per-CPU + * lazy list, so what we check here in the first place is whether + * per-CPU areas have been set up. + */ + if (printk_safe_init_done) { + __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); + irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); } + printk_safe_exit(flags); return printed_len; } @@ -2623,57 +2650,15 @@ static int __init printk_late_init(void) late_initcall(printk_late_init); #if defined CONFIG_PRINTK -/* - * Delayed printk version, for scheduler-internal messages: - */ -#define PRINTK_PENDING_WAKEUP 0x01 -#define PRINTK_PENDING_OUTPUT 0x02 - -static DEFINE_PER_CPU(int, printk_pending); - -static void wake_up_klogd_work_func(struct irq_work *irq_work) -{ - int pending = __this_cpu_xchg(printk_pending, 0); - - if (pending & PRINTK_PENDING_OUTPUT) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); - } - - if (pending & PRINTK_PENDING_WAKEUP) - wake_up_interruptible(&log_wait); -} - -static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = { - .func = wake_up_klogd_work_func, - .flags = IRQ_WORK_LAZY, -}; - -void wake_up_klogd(void) -{ - preempt_disable(); - if (waitqueue_active(&log_wait)) { - this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); - irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); - } - preempt_enable(); -} - int printk_deferred(const char *fmt, ...) { va_list args; int r; - preempt_disable(); va_start(args, fmt); - r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args); + r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); va_end(args); - __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); - irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); - preempt_enable(); - return r; } diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index cd5160112374..bb8fd72d817d 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -39,7 +39,7 @@ * There are situations when we want to make sure that all buffers * were handled or when IRQs are blocked. */ -static int printk_safe_irq_ready; +bool printk_safe_init_done; #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ sizeof(atomic_t) - \ @@ -103,7 +103,7 @@ static int printk_safe_log_store(struct printk_safe_seq_buf *s, goto again; /* Get flushed in a more safe context. */ - if (add && printk_safe_irq_ready) { + if (add && printk_safe_init_done) { /* Make sure that IRQ work is really initialized. */ smp_rmb(); irq_work_queue(&s->work); @@ -372,7 +372,7 @@ void __init printk_safe_init(void) /* Make sure that IRQ works are initialized before enabling. */ smp_wmb(); - printk_safe_irq_ready = 1; + printk_safe_init_done = true; /* Flush pending messages that did not have scheduled IRQ works. */ printk_safe_flush(); -- 2.11.0
>From 1ab619d1d880c537dc670d599ecac02486fe2fae Mon Sep 17 00:00:00 2001 From: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Date: Tue, 20 Dec 2016 23:57:36 +0900 Subject: [PATCH 9/9] printk: make printing asynchronous We moved console_unlock() from vprintk_emit() to printk irq_work in order to make printk() deadlock and recursion safe. The problem with the console_unlock() function is that it's time unbound. Once the CPU acquires the console_sem it spins in console_unlock() printing the kernel messages as long as the are messages in the logbuf, while other CPUs are free to append new messages to the logbuf. Things do get worse when console_unlock() has to print messages on a slow serial console. This, in the worst case, can lockup the system. Luckily, not every system printk-s enough messages to cause any problems, yet we still need to take care of those systems that actually do. This patch introduces a dedicated printk kthread: instead of doing console_unlock() from irq_work context we wake_up() a special kthread to do the actual printing to console. This has the advantage that printing always happens from a schedulable contex, so we can cond_resched() between the lines, if necessary, and thus we don't lockup any particular CPU. So the printing now looks as follows: CPUx printk() vprintk_emit() irq_work_queue() IRQ irq_work_run_list() wake_up_klogd_work_func() wake_up_process(printk_kthread) * printk_kthread console_lock() console_unlock() call_console_drivers() * When system becomes unstable (at least one emergency level message appears in the logbuf) we can't rely on scheduler to wake up printing kthread and, thus, we switch back to old wake_up_klogd_work_func() behavior and attempt to acquire the console semaphore and console_unlock() from irq_work context. Another thing to notice is that wake_up_klogd_work_func() behaviour can be forcibly changed to the previous one, by providing a `printk.synchronous=1' boot param. The patch has evolved and changed a lot, but "credit where credit's due" - the list of people (not in any particular order) who commented or/and tested previous versions of the patch: Viresh Kumar, Tetsuo Handa, Byungchul Park. Signed-off-by: Jan Kara <jack@xxxxxxx> Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> --- Documentation/admin-guide/kernel-parameters.txt | 9 +++ kernel/printk/printk.c | 92 ++++++++++++++++++++++++- 2 files changed, 98 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 21e2d8863705..93b415f0f887 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3096,6 +3096,15 @@ printk.time= Show timing data prefixed to each printk message line Format: <bool> (1/Y/y=enable, 0/N/n=disable) + printk.synchronous= + This parameter controls whether kernel messages must + be printed to console synchronously or asynchronously. + Asynchronous printing avoids kernel stalling behind + slow serial console and thus avoids interrupt timeouts + and lockups. printk operates in synchronous mode during + early boot and also switches back to synchronous mode + if the system becomes unstable (emergency). + processor.max_cstate= [HW,ACPI] Limit processor to maximum C-state max_cstate=9 overrides any DMI blacklist limit. diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index fee5e0ea4a62..fe72f7e098c4 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -45,6 +45,7 @@ #include <linux/utsname.h> #include <linux/ctype.h> #include <linux/uio.h> +#include <linux/kthread.h> #include <asm/uaccess.h> #include <asm/sections.h> @@ -401,19 +402,46 @@ static char __log_buf[__LOG_BUF_LEN] __aligned(LOG_ALIGN); static char *log_buf = __log_buf; static u32 log_buf_len = __LOG_BUF_LEN; +/* Control whether printing to console must be synchronous. */ +static bool __read_mostly printk_sync; +module_param_named(synchronous, printk_sync, bool, 0444); +MODULE_PARM_DESC(synchronous, "make printing to console synchronous"); + +/* + * Force sync printk mode during suspend/kexec, regardless whether + * console_suspend_enabled permits console suspend. + */ +static bool __read_mostly force_printk_sync; +/* Printing kthread for async printk */ +static struct task_struct *printk_kthread; +/* When `true' printing thread has messages to print */ +static bool printk_kthread_need_flush_console; + #define PRINTK_PENDING_WAKEUP 0x01 #define PRINTK_PENDING_OUTPUT 0x02 static DEFINE_PER_CPU(int, printk_pending); +static inline bool can_printk_async(void) +{ + return !printk_sync && printk_kthread && !force_printk_sync; +} + static void wake_up_klogd_work_func(struct irq_work *irq_work) { int pending = __this_cpu_xchg(printk_pending, 0); if (pending & PRINTK_PENDING_OUTPUT) { - /* If trylock fails, someone else is doing the printing */ - if (console_trylock()) - console_unlock(); + if (can_printk_async()) { + wake_up_process(printk_kthread); + } else { + /* + * If trylock fails, someone else + * is doing the printing. + */ + if (console_trylock()) + console_unlock(); + } } if (pending & PRINTK_PENDING_WAKEUP) @@ -1745,6 +1773,13 @@ asmlinkage int vprintk_emit(int facility, int level, if (level == LOGLEVEL_DEFAULT) level = default_message_loglevel; + /* The system is unstable, switch to sync printk mode */ + if (level == LOGLEVEL_EMERG) + printk_sync = true; + + if (can_printk_async()) + printk_kthread_need_flush_console = true; + if (dict) lflags |= LOG_PREFIX|LOG_NEWLINE; @@ -1867,6 +1902,7 @@ static void call_console_drivers(int level, static size_t msg_print_text(const struct printk_log *msg, bool syslog, char *buf, size_t size) { return 0; } static bool suppress_message_printing(int level) { return false; } +static bool __read_mostly force_printk_sync; #endif /* CONFIG_PRINTK */ @@ -2003,6 +2039,8 @@ MODULE_PARM_DESC(console_suspend, "suspend console during suspend" */ void suspend_console(void) { + force_printk_sync = true; + if (!console_suspend_enabled) return; printk("Suspending console(s) (use no_console_suspend to debug)\n"); @@ -2013,6 +2051,8 @@ void suspend_console(void) void resume_console(void) { + force_printk_sync = false; + if (!console_suspend_enabled) return; down_console_sem(); @@ -2650,6 +2690,52 @@ static int __init printk_late_init(void) late_initcall(printk_late_init); #if defined CONFIG_PRINTK +static int printk_kthread_func(void *data) +{ + while (1) { + set_current_state(TASK_INTERRUPTIBLE); + if (!printk_kthread_need_flush_console) + schedule(); + + __set_current_state(TASK_RUNNING); + /* + * Avoid an infinite loop when console_unlock() cannot + * access consoles, e.g. because console_suspended is + * true. schedule(), someone else will print the messages + * from resume_console(). + */ + printk_kthread_need_flush_console = false; + + console_lock(); + console_unlock(); + } + + return 0; +} + +/* + * Init async printk via late_initcall, after core/arch/device/etc. + * initialization. + */ +static int __init init_printk_kthread(void) +{ + struct task_struct *thread; + + if (printk_sync) + return 0; + + thread = kthread_run(printk_kthread_func, NULL, "printk"); + if (IS_ERR(thread)) { + pr_err("printk: unable to create printing thread\n"); + printk_sync = true; + return PTR_ERR(thread); + } + + printk_kthread = thread; + return 0; +} +late_initcall(init_printk_kthread); + int printk_deferred(const char *fmt, ...) { va_list args; -- 2.11.0