On Wed, 30 Aug 2017, Denis Plotnikov wrote: > The callback provides extended information about just read > clocksource value. > > It's going to be used in cases when detailed system information > needed for further time related values calculation, e.g in KVM > masterclock settings calculation. > > Signed-off-by: Denis Plotnikov <dplotnikov@xxxxxxxxxxxxx> > --- > include/linux/clocksource.h | 11 ++++++++++- > 1 file changed, 10 insertions(+), 1 deletion(-) > > diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h > index a78cb18..786a522 100644 > --- a/include/linux/clocksource.h > +++ b/include/linux/clocksource.h > @@ -48,7 +48,14 @@ struct module; > * 400-499: Perfect > * The ideal clocksource. A must-use where > * available. > - * @read: returns a cycle value, passes clocksource as argument > + * @read: returns a cycle value (might be not quite cycles: > + * see pvclock) passes clocksource as argument > + * @read_with_stamp: saves cycles value (got from timekeeper) and cycles > + * stamp (got from hardware counter value and used by > + * timekeeper to calculate the cycles value) to > + * corresponding input pointers return true if cycles > + * stamp holds real cycles and false if it holds some > + * time derivative value Neither the changelog nor this comment make any sense. Not to talk about the actual TSC side implementation which does the same as tsc_read() - it actually uses tsc_read() - but stores the same value twice and unconditionally returns true. I have no idea why you need this extra voodoo function if you can achieve the same thing with a simple property bit in clocksource->flags. Neither do I understand the rest of the magic you introduce in the snapshot code: > + if (clock->read_with_stamp) > + systime_snapshot->cycles_valid = > + clock->read_with_stamp( > + clock, &now, &systime_snapshot->cycles); > + else { > + systime_snapshot->cycles_valid = false; > + now = clock->read(clock); > + systime_snapshot->cycles = now; > + } The only difference between the two code pathes is the effect on systime_snapshot->cycles_valid. The explanation of that bit is not making much sense either. + * @cycles_valid: The flag is true when @cycles contain actual + * number of cycles instead some cycle derivative Why the heck would cycles be something different than what clock->read() returns? What you really want to convey is the information whether now = tk_clock_read(&tk->tkr_mono); is a value which you can use for your pvclock correlation or not. Unless I'm missing something important all of this can be achieved with a minimal and actually understandable patch. See below. Thanks, tglx 8<------------------ --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1045,7 +1045,8 @@ static struct clocksource clocksource_ts .read = read_tsc, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS | - CLOCK_SOURCE_MUST_VERIFY, + CLOCK_SOURCE_MUST_VERIFY | + CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, .mark_unstable = tsc_cs_mark_unstable, --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -118,7 +118,9 @@ struct clocksource { #define CLOCK_SOURCE_VALID_FOR_HRES 0x20 #define CLOCK_SOURCE_UNSTABLE 0x40 #define CLOCK_SOURCE_SUSPEND_NONSTOP 0x80 -#define CLOCK_SOURCE_RESELECT 0x100 +#define CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE 0x100 + +#define CLOCK_SOURCE_RESELECT 0x200 /* simplify initialization of mask field */ #define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0) --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -285,6 +285,8 @@ extern void ktime_get_raw_and_real_ts64( * @raw: Monotonic raw system time * @clock_was_set_seq: The sequence number of clock was set events * @cs_was_changed_seq: The sequence number of clocksource change events + * @valid_for_pvclock_update: @cycles is from a clocksource which + * can be used for pvclock updates */ struct system_time_snapshot { u64 cycles; @@ -292,6 +294,7 @@ struct system_time_snapshot { ktime_t raw; unsigned int clock_was_set_seq; u8 cs_was_changed_seq; + bool valid_for_pvclock_update; }; /* --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -948,7 +948,7 @@ time64_t __ktime_get_real_seconds(void) void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot) { struct timekeeper *tk = &tk_core.timekeeper; - unsigned long seq; + unsigned long seq, clk_flags; ktime_t base_raw; ktime_t base_real; u64 nsec_raw; @@ -960,6 +960,7 @@ void ktime_get_snapshot(struct system_ti do { seq = read_seqcount_begin(&tk_core.seq); now = tk_clock_read(&tk->tkr_mono); + clk_flags = tk->tkr_mono.clock->flags; systime_snapshot->cs_was_changed_seq = tk->cs_was_changed_seq; systime_snapshot->clock_was_set_seq = tk->clock_was_set_seq; base_real = ktime_add(tk->tkr_mono.base, @@ -970,6 +971,8 @@ void ktime_get_snapshot(struct system_ti } while (read_seqcount_retry(&tk_core.seq, seq)); systime_snapshot->cycles = now; + systime_snapshot->valid_for_pvclock_update = + clk_flags & CLOCK_SOURCE_VALID_FOR_PVCLOCK_UPDATE, systime_snapshot->real = ktime_add_ns(base_real, nsec_real); systime_snapshot->raw = ktime_add_ns(base_raw, nsec_raw); }