3.16.7-ckt2 -stable review patch. If anyone has any objections, please let me know. ------------------ From: Rabin Vincent <rabin@xxxxxx> commit e30f53aad2202b5526c40c36d8eeac8bf290bde5 upstream. On a !PREEMPT kernel, attempting to use trace-cmd results in a soft lockup: # trace-cmd record -e raw_syscalls:* -F false NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61] ... Call Trace: [<ffffffff8105b580>] ? __wake_up_common+0x90/0x90 [<ffffffff81092e25>] wait_on_pipe+0x35/0x40 [<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0 [<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0 [<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40 [<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290 [<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60 [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80 [<ffffffff8112415d>] do_splice_to+0x6d/0x90 [<ffffffff81126971>] SyS_splice+0x7c1/0x800 [<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8 The problem is this: tracing_buffers_splice_read() calls ring_buffer_wait() to wait for data in the ring buffers. The buffers are not empty so ring_buffer_wait() returns immediately. But tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1, meaning it only wants to read a full page. When the full page is not available, tracing_buffers_splice_read() tries to wait again with ring_buffer_wait(), which again returns immediately, and so on. Fix this by adding a "full" argument to ring_buffer_wait() which will make ring_buffer_wait() wait until the writer has left the reader's page, i.e. until full-page reads will succeed. Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@xxxxxx Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function") Signed-off-by: Rabin Vincent <rabin@xxxxxx> Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx> Signed-off-by: Luis Henriques <luis.henriques@xxxxxxxxxxxxx> --- include/linux/ring_buffer.h | 2 +- kernel/trace/ring_buffer.c | 81 ++++++++++++++++++++++++++++++--------------- kernel/trace/trace.c | 23 ++++--------- 3 files changed, 62 insertions(+), 44 deletions(-) diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 49a4d6f59108..e2c13cd863bd 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k __ring_buffer_alloc((size), (flags), &__key); \ }) -int ring_buffer_wait(struct ring_buffer *buffer, int cpu); +int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full); int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu, struct file *filp, poll_table *poll_table); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 51862982e1e9..80516c7cee86 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work) * ring_buffer_wait - wait for input to the ring buffer * @buffer: buffer to wait on * @cpu: the cpu buffer to wait on + * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS * * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon * as data is added to any of the @buffer's cpu buffers. Otherwise * it will wait for data to be added to a specific cpu buffer. */ -int ring_buffer_wait(struct ring_buffer *buffer, int cpu) +int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full) { - struct ring_buffer_per_cpu *cpu_buffer; + struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer); DEFINE_WAIT(wait); struct rb_irq_work *work; + int ret = 0; /* * Depending on what the caller is waiting for, either any @@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu) } - prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); + while (true) { + prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE); - /* - * The events can happen in critical sections where - * checking a work queue can cause deadlocks. - * After adding a task to the queue, this flag is set - * only to notify events to try to wake up the queue - * using irq_work. - * - * We don't clear it even if the buffer is no longer - * empty. The flag only causes the next event to run - * irq_work to do the work queue wake up. The worse - * that can happen if we race with !trace_empty() is that - * an event will cause an irq_work to try to wake up - * an empty queue. - * - * There's no reason to protect this flag either, as - * the work queue and irq_work logic will do the necessary - * synchronization for the wake ups. The only thing - * that is necessary is that the wake up happens after - * a task has been queued. It's OK for spurious wake ups. - */ - work->waiters_pending = true; + /* + * The events can happen in critical sections where + * checking a work queue can cause deadlocks. + * After adding a task to the queue, this flag is set + * only to notify events to try to wake up the queue + * using irq_work. + * + * We don't clear it even if the buffer is no longer + * empty. The flag only causes the next event to run + * irq_work to do the work queue wake up. The worse + * that can happen if we race with !trace_empty() is that + * an event will cause an irq_work to try to wake up + * an empty queue. + * + * There's no reason to protect this flag either, as + * the work queue and irq_work logic will do the necessary + * synchronization for the wake ups. The only thing + * that is necessary is that the wake up happens after + * a task has been queued. It's OK for spurious wake ups. + */ + work->waiters_pending = true; + + if (signal_pending(current)) { + ret = -EINTR; + break; + } + + if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer)) + break; + + if (cpu != RING_BUFFER_ALL_CPUS && + !ring_buffer_empty_cpu(buffer, cpu)) { + unsigned long flags; + bool pagebusy; + + if (!full) + break; + + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + if (!pagebusy) + break; + } - if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) || - (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu))) schedule(); + } finish_wait(&work->waiters, &wait); - return 0; + + return ret; } /** diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 291397e66669..bbf1381c2201 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1099,13 +1099,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu) } #endif /* CONFIG_TRACER_MAX_TRACE */ -static int wait_on_pipe(struct trace_iterator *iter) +static int wait_on_pipe(struct trace_iterator *iter, bool full) { /* Iterators are static, they should be filled or empty */ if (trace_buffer_iter(iter, iter->cpu_file)) return 0; - return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file); + return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file, + full); } #ifdef CONFIG_FTRACE_STARTUP_TEST @@ -4412,15 +4413,12 @@ static int tracing_wait_pipe(struct file *filp) mutex_unlock(&iter->mutex); - ret = wait_on_pipe(iter); + ret = wait_on_pipe(iter, false); mutex_lock(&iter->mutex); if (ret) return ret; - - if (signal_pending(current)) - return -EINTR; } return 1; @@ -5343,16 +5341,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, goto out_unlock; } mutex_unlock(&trace_types_lock); - ret = wait_on_pipe(iter); + ret = wait_on_pipe(iter, false); mutex_lock(&trace_types_lock); if (ret) { size = ret; goto out_unlock; } - if (signal_pending(current)) { - size = -EINTR; - goto out_unlock; - } goto again; } size = 0; @@ -5558,14 +5552,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos, goto out; } mutex_unlock(&trace_types_lock); - ret = wait_on_pipe(iter); + ret = wait_on_pipe(iter, true); mutex_lock(&trace_types_lock); if (ret) goto out; - if (signal_pending(current)) { - ret = -EINTR; - goto out; - } + goto again; } -- 2.1.0 -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html