On Wed, 8 Feb 2017 11:24:59 -0600 Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx> wrote: > Replace the unused RINGBUF_TYPE_TIME_STAMP ring buffer type with > RINGBUF_TYPE_TIME_EXTEND_ABS, which forces extended time_deltas for > all events. Hmm, I could probably have this be used for nested commits :-/ > > Having time_deltas that aren't dependent on previous events in the > ring buffer makes it feasible to use the ring_buffer_event timetamps > in a more random-access way, to be used for purposes other than serial > event printing. > > To set/reset this mode, use tracing_set_timestamp_abs(). > > Signed-off-by: Tom Zanussi <tom.zanussi@xxxxxxxxxxxxxxx> > --- > include/linux/ring_buffer.h | 12 ++++- > kernel/trace/ring_buffer.c | 109 ++++++++++++++++++++++++++++++++------------ > kernel/trace/trace.c | 25 +++++++++- > kernel/trace/trace.h | 2 + > 4 files changed, 117 insertions(+), 31 deletions(-) > > diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h > index b6d4568..c3a1064 100644 > --- a/include/linux/ring_buffer.h > +++ b/include/linux/ring_buffer.h > @@ -36,6 +36,12 @@ struct ring_buffer_event { > * array[0] = time delta (28 .. 59) > * size = 8 bytes > * > + * @RINGBUF_TYPE_TIME_EXTEND_ABS: > + * Extend the time delta, but interpret it as > + * absolute, not relative > + * array[0] = time delta (28 .. 59) > + * size = 8 bytes > + * > * @RINGBUF_TYPE_TIME_STAMP: Sync time stamp with external clock I guess you need to nuke this comment too. > * array[0] = tv_nsec > * array[1..2] = tv_sec > @@ -56,12 +62,12 @@ enum ring_buffer_type { > RINGBUF_TYPE_DATA_TYPE_LEN_MAX = 28, > RINGBUF_TYPE_PADDING, > RINGBUF_TYPE_TIME_EXTEND, > - /* FIXME: RINGBUF_TYPE_TIME_STAMP not implemented */ > - RINGBUF_TYPE_TIME_STAMP, > + RINGBUF_TYPE_TIME_EXTEND_ABS, > }; > > unsigned ring_buffer_event_length(struct ring_buffer_event *event); > void *ring_buffer_event_data(struct ring_buffer_event *event); > +u64 ring_buffer_event_time_stamp(struct ring_buffer_event *event); > > /* > * ring_buffer_discard_commit will remove an event that has not > @@ -180,6 +186,8 @@ void ring_buffer_normalize_time_stamp(struct ring_buffer *buffer, > int cpu, u64 *ts); > void ring_buffer_set_clock(struct ring_buffer *buffer, > u64 (*clock)(void)); > +void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs); > +bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer); > > size_t ring_buffer_page_len(void *page); > > diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c > index a85739e..c9c9a83 100644 > --- a/kernel/trace/ring_buffer.c > +++ b/kernel/trace/ring_buffer.c > @@ -41,6 +41,8 @@ int ring_buffer_print_entry_header(struct trace_seq *s) > RINGBUF_TYPE_PADDING); > trace_seq_printf(s, "\ttime_extend : type == %d\n", > RINGBUF_TYPE_TIME_EXTEND); > + trace_seq_printf(s, "\ttime_extend_abs : type == %d\n", > + RINGBUF_TYPE_TIME_EXTEND_ABS); > trace_seq_printf(s, "\tdata max type_len == %d\n", > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); > > @@ -186,11 +188,9 @@ static void rb_event_set_padding(struct ring_buffer_event *event) > return event->array[0] + RB_EVNT_HDR_SIZE; > > case RINGBUF_TYPE_TIME_EXTEND: > + case RINGBUF_TYPE_TIME_EXTEND_ABS: > return RB_LEN_TIME_EXTEND; > > - case RINGBUF_TYPE_TIME_STAMP: > - return RB_LEN_TIME_STAMP; > - > case RINGBUF_TYPE_DATA: > return rb_event_data_length(event); > default: > @@ -209,7 +209,8 @@ static void rb_event_set_padding(struct ring_buffer_event *event) > { > unsigned len = 0; > > - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) { > + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND || > + event->type_len == RINGBUF_TYPE_TIME_EXTEND_ABS) { Hmm, we could micro-optimize this with: event->type_len > RINGBUF_TYPE_PADDING But it would require comments and/or a wrapper to define it so people in the future know what it is doing. > /* time extends include the data event after it */ > len = RB_LEN_TIME_EXTEND; > event = skip_time_extend(event); > @@ -231,7 +232,8 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) > { > unsigned length; > > - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) > + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND || > + event->type_len == RINGBUF_TYPE_TIME_EXTEND_ABS) > event = skip_time_extend(event); > > length = rb_event_length(event); > @@ -248,7 +250,8 @@ unsigned ring_buffer_event_length(struct ring_buffer_event *event) > static __always_inline void * > rb_event_data(struct ring_buffer_event *event) > { > - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) > + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND || > + event->type_len == RINGBUF_TYPE_TIME_EXTEND_ABS) > event = skip_time_extend(event); > BUG_ON(event->type_len > RINGBUF_TYPE_DATA_TYPE_LEN_MAX); > /* If length is in len field, then array[0] has the data */ > @@ -483,6 +486,7 @@ struct ring_buffer { > u64 (*clock)(void); > > struct rb_irq_work irq_work; > + bool time_stamp_abs; > }; > > struct ring_buffer_iter { > @@ -1377,6 +1381,16 @@ void ring_buffer_set_clock(struct ring_buffer *buffer, > buffer->clock = clock; > } > > +void ring_buffer_set_time_stamp_abs(struct ring_buffer *buffer, bool abs) > +{ > + buffer->time_stamp_abs = abs; > +} > + > +bool ring_buffer_time_stamp_abs(struct ring_buffer *buffer) > +{ > + return buffer->time_stamp_abs; > +} > + > static void rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer); > > static inline unsigned long rb_page_entries(struct buffer_page *bpage) > @@ -2207,13 +2221,16 @@ static void rb_inc_iter(struct ring_buffer_iter *iter) > } > > /* Slow path, do not inline */ > -static noinline struct ring_buffer_event * > -rb_add_time_stamp(struct ring_buffer_event *event, u64 delta) > +static struct noinline ring_buffer_event * > +rb_add_time_stamp(struct ring_buffer_event *event, u64 delta, bool abs) > { > - event->type_len = RINGBUF_TYPE_TIME_EXTEND; > + if (abs) > + event->type_len = RINGBUF_TYPE_TIME_EXTEND_ABS; > + else > + event->type_len = RINGBUF_TYPE_TIME_EXTEND; > > - /* Not the first event on the page? */ > - if (rb_event_index(event)) { > + /* Not the first event on the page, or not delta? */ > + if (abs || rb_event_index(event)) { > event->time_delta = delta & TS_MASK; > event->array[0] = delta >> TS_SHIFT; > } else { > @@ -2256,7 +2273,9 @@ static inline bool rb_event_is_commit(struct ring_buffer_per_cpu *cpu_buffer, > * add it to the start of the resevered space. > */ > if (unlikely(info->add_timestamp)) { > - event = rb_add_time_stamp(event, delta); > + bool abs = ring_buffer_time_stamp_abs(cpu_buffer->buffer); > + > + event = rb_add_time_stamp(event, info->delta, abs); > length -= RB_LEN_TIME_EXTEND; > delta = 0; > } > @@ -2444,7 +2463,8 @@ static __always_inline void rb_end_commit(struct ring_buffer_per_cpu *cpu_buffer > > static inline void rb_event_discard(struct ring_buffer_event *event) > { > - if (event->type_len == RINGBUF_TYPE_TIME_EXTEND) > + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND || > + event->type_len == RINGBUF_TYPE_TIME_EXTEND_ABS) > event = skip_time_extend(event); > > /* array[0] holds the actual length for the discarded event */ > @@ -2475,6 +2495,10 @@ static inline void rb_event_discard(struct ring_buffer_event *event) > { > u64 delta; > > + /* Ignore write_stamp if TIME_EXTEND_ABS */ > + if (event->type_len == RINGBUF_TYPE_TIME_EXTEND_ABS) > + return; > + Hmm, I don't trust this. This function does a bit of book keeping as well. > /* > * The event first in the commit queue updates the > * time stamp. > @@ -2492,8 +2516,7 @@ static inline void rb_event_discard(struct ring_buffer_event *event) > delta <<= TS_SHIFT; > delta += event->time_delta; > cpu_buffer->write_stamp += delta; > - } else > - cpu_buffer->write_stamp += event->time_delta; > + } And why is this removed? > } > } > > @@ -2674,7 +2697,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, > * If this is the first commit on the page, then it has the same > * timestamp as the page itself. > */ > - if (!tail) > + if (!tail && !ring_buffer_time_stamp_abs(cpu_buffer->buffer)) > info->delta = 0; > > /* See if we shot pass the end of this buffer page */ > @@ -2752,8 +2775,11 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer, > /* make sure this diff is calculated here */ > barrier(); > > - /* Did the write stamp get updated already? */ > - if (likely(info.ts >= cpu_buffer->write_stamp)) { > + if (ring_buffer_time_stamp_abs(buffer)) { > + info.delta = info.ts; > + rb_handle_timestamp(cpu_buffer, &info); > + } else /* Did the write stamp get updated already? */ > + if (likely(info.ts >= cpu_buffer->write_stamp)) { OK, please break this patch up into two. Although, I may take it and start on it as well ;-) One with the implementation of the EXTEND_ABS, and the other with the setting of the flags. If we are going to implement the time stamp ext, I want to see if I can use it to fix other issues with the ring buffer. Actually, I'm thinking that we could keep the TIME_STAMP name, and just implement it as a full timestamp, not a delta. I believe that was what I wanted it for in the first place. -- Steve > info.delta = diff; > if (unlikely(test_time_stamp(info.delta))) > rb_handle_timestamp(cpu_buffer, &info); > @@ -3429,8 +3455,8 @@ int ring_buffer_iter_empty(struct ring_buffer_iter *iter) > cpu_buffer->read_stamp += delta; > return; > > - case RINGBUF_TYPE_TIME_STAMP: > - /* FIXME: not implemented */ > + case RINGBUF_TYPE_TIME_EXTEND_ABS: > + /* Ignore read_stamp if TIME_EXTEND_ABS */ > return; > > -- To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html