Dear RT folks! I'm pleased to announce the v4.14.18-rt15 patch set. Changes since v4.14.18-rt14: - Remove unused mutex brd_mutex, it was causing a "defined but not used warning". Reported by Dan Murphy, patch by Mikulas Patocka. - Update the "tracing: Inter-event (e.g. latency) support" patch to what was merged in the end. The significant change is probably the inclusion of commit a0e3a18f4baf ("ring-buffer: Bring back context level recursive checks"). Known issues - A warning triggered in "rcu_note_context_switch" originated from SyS_timer_gettime(). The issue was always there, it is now visible. Reported by Grygorii Strashko and Daniel Wagner. The delta patch against v4.14.18-rt14 is appended below and can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.14/incr/patch-4.14.18-rt14-rt15.patch.xz You can get this release via the git tree at: git://git.kernel.org/pub/scm/linux/kernel/git/rt/linux-rt-devel.git v4.14.18-rt15 The RT patch against v4.14.18 can be found here: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patch-4.14.18-rt15.patch.xz The split quilt queue is available at: https://cdn.kernel.org/pub/linux/kernel/projects/rt/4.14/older/patches-4.14.18-rt15.tar.xz Sebastian diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 2d7178f7754e..c1cf87718c2e 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -60,7 +60,6 @@ struct brd_device { /* * Look up and return a brd's page for a given sector. */ -static DEFINE_MUTEX(brd_mutex); static struct page *brd_lookup_page(struct brd_device *brd, sector_t sector) { pgoff_t idx; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 47f9791971a4..ea8c1dd830f7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -120,6 +120,9 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, int ring_buffer_write(struct ring_buffer *buffer, unsigned long length, void *data); +void ring_buffer_nest_start(struct ring_buffer *buffer); +void ring_buffer_nest_end(struct ring_buffer *buffer); + struct ring_buffer_event * ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, unsigned long *lost_events); diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index c4d554286334..76fe2d01d801 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -478,6 +478,7 @@ struct ring_buffer_per_cpu { struct buffer_page *reader_page; unsigned long lost_events; unsigned long last_overrun; + unsigned long nest; local_t entries_bytes; local_t entries; local_t overrun; @@ -2585,29 +2586,58 @@ rb_wakeups(struct ring_buffer *buffer, struct ring_buffer_per_cpu *cpu_buffer) * The lock and unlock are done within a preempt disable section. * The current_context per_cpu variable can only be modified * by the current task between lock and unlock. But it can - * be modified more than once via an interrupt. There are four - * different contexts that we need to consider. + * be modified more than once via an interrupt. To pass this + * information from the lock to the unlock without having to + * access the 'in_interrupt()' functions again (which do show + * a bit of overhead in something as critical as function tracing, + * we use a bitmask trick. * - * Normal context. - * SoftIRQ context - * IRQ context - * NMI context + * bit 0 = NMI context + * bit 1 = IRQ context + * bit 2 = SoftIRQ context + * bit 3 = normal context. * - * If for some reason the ring buffer starts to recurse, we only allow - * that to happen at most 6 times (one for each context, plus possibly - * two levels of synthetic event generation). If it happens 7 times, - * then we consider this a recusive loop and do not let it go further. + * This works because this is the order of contexts that can + * preempt other contexts. A SoftIRQ never preempts an IRQ + * context. + * + * When the context is determined, the corresponding bit is + * checked and set (if it was set, then a recursion of that context + * happened). + * + * On unlock, we need to clear this bit. To do so, just subtract + * 1 from the current_context and AND it to itself. + * + * (binary) + * 101 - 1 = 100 + * 101 & 100 = 100 (clearing bit zero) + * + * 1010 - 1 = 1001 + * 1010 & 1001 = 1000 (clearing bit 1) + * + * The least significant bit can be cleared this way, and it + * just so happens that it is the same bit corresponding to + * the current context. */ static __always_inline int trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) { - if (cpu_buffer->current_context >= 6) + unsigned int val = cpu_buffer->current_context; + unsigned long pc = preempt_count(); + int bit; + + if (!(pc & (NMI_MASK | HARDIRQ_MASK | SOFTIRQ_OFFSET))) + bit = RB_CTX_NORMAL; + else + bit = pc & NMI_MASK ? RB_CTX_NMI : + pc & HARDIRQ_MASK ? RB_CTX_IRQ : RB_CTX_SOFTIRQ; + + if (unlikely(val & (1 << (bit + cpu_buffer->nest)))) return 1; - cpu_buffer->current_context++; - /* Interrupts must see this update */ - barrier(); + val |= (1 << (bit + cpu_buffer->nest)); + cpu_buffer->current_context = val; return 0; } @@ -2615,9 +2645,57 @@ trace_recursive_lock(struct ring_buffer_per_cpu *cpu_buffer) static __always_inline void trace_recursive_unlock(struct ring_buffer_per_cpu *cpu_buffer) { - /* Don't let the dec leak out */ - barrier(); - cpu_buffer->current_context--; + cpu_buffer->current_context &= + cpu_buffer->current_context - (1 << cpu_buffer->nest); +} + +/* The recursive locking above uses 4 bits */ +#define NESTED_BITS 4 + +/** + * ring_buffer_nest_start - Allow to trace while nested + * @buffer: The ring buffer to modify + * + * The ring buffer has a safty mechanism to prevent recursion. + * But there may be a case where a trace needs to be done while + * tracing something else. In this case, calling this function + * will allow this function to nest within a currently active + * ring_buffer_lock_reserve(). + * + * Call this function before calling another ring_buffer_lock_reserve() and + * call ring_buffer_nest_end() after the nested ring_buffer_unlock_commit(). + */ +void ring_buffer_nest_start(struct ring_buffer *buffer) +{ + struct ring_buffer_per_cpu *cpu_buffer; + int cpu; + + /* Enabled by ring_buffer_nest_end() */ + preempt_disable_notrace(); + cpu = raw_smp_processor_id(); + cpu_buffer = buffer->buffers[cpu]; + /* This is the shift value for the above recusive locking */ + cpu_buffer->nest += NESTED_BITS; +} + +/** + * ring_buffer_nest_end - Allow to trace while nested + * @buffer: The ring buffer to modify + * + * Must be called after ring_buffer_nest_start() and after the + * ring_buffer_unlock_commit(). + */ +void ring_buffer_nest_end(struct ring_buffer *buffer) +{ + struct ring_buffer_per_cpu *cpu_buffer; + int cpu; + + /* disabled by ring_buffer_nest_start() */ + cpu = raw_smp_processor_id(); + cpu_buffer = buffer->buffers[cpu]; + /* This is the shift value for the above recusive locking */ + cpu_buffer->nest -= NESTED_BITS; + preempt_enable_notrace(); } /** diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index e47b56632367..49afef3cc384 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -640,6 +640,7 @@ static notrace void trace_event_raw_event_synth(void *__data, struct trace_event_file *trace_file = __data; struct synth_trace_event *entry; struct trace_event_buffer fbuffer; + struct ring_buffer *buffer; struct synth_event *event; unsigned int i, n_u64; int fields_size = 0; @@ -651,10 +652,17 @@ static notrace void trace_event_raw_event_synth(void *__data, fields_size = event->n_u64 * sizeof(u64); + /* + * Avoid ring buffer recursion detection, as this event + * is being performed within another event. + */ + buffer = trace_file->tr->trace_buffer.buffer; + ring_buffer_nest_start(buffer); + entry = trace_event_buffer_reserve(&fbuffer, trace_file, sizeof(*entry) + fields_size); if (!entry) - return; + goto out; for (i = 0, n_u64 = 0; i < event->n_fields; i++) { if (event->fields[i]->is_string) { @@ -670,6 +678,8 @@ static notrace void trace_event_raw_event_synth(void *__data, } trace_event_buffer_commit(&fbuffer); +out: + ring_buffer_nest_end(buffer); } static void free_synth_event_print_fmt(struct trace_event_call *call) diff --git a/localversion-rt b/localversion-rt index 08b3e75841ad..18777ec0c27d 100644 --- a/localversion-rt +++ b/localversion-rt @@ -1 +1 @@ --rt14 +-rt15 -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html