This is a note to let you know that I've just added the patch titled perf intel-pt: Improve sample timestamp to the 4.4-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: perf-intel-pt-improve-sample-timestamp.patch and it can be found in the queue-4.4 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 3f04d98e972b59706bd43d6cc75efac91f8fba50 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@xxxxxxxxx> Date: Fri, 26 May 2017 11:17:03 +0300 Subject: perf intel-pt: Improve sample timestamp From: Adrian Hunter <adrian.hunter@xxxxxxxxx> commit 3f04d98e972b59706bd43d6cc75efac91f8fba50 upstream. The decoder uses its current timestamp in samples. Usually that is a timestamp that has already passed, but in some cases it is a timestamp for a branch that the decoder is walking towards, and consequently hasn't reached. Improve that situation by using the pkt_state to determine when to use the current or previous timestamp. Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Link: http://lkml.kernel.org/r/1495786658-18063-3-git-send-email-adrian.hunter@xxxxxxxxx Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 34 ++++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -98,6 +117,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -140,6 +160,7 @@ struct intel_pt_decoder { unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -896,6 +917,7 @@ static int intel_pt_walk_insn(struct int decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -2035,7 +2057,7 @@ static int intel_pt_sync(struct intel_pt static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2043,7 +2065,7 @@ static uint64_t intel_pt_est_timestamp(s est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2099,11 +2121,17 @@ const struct intel_pt_state *intel_pt_de if (err) { decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; } else { decoder->state.err = 0; + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } } - decoder->state.timestamp = decoder->timestamp; + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; Patches currently in stable-queue which might be from adrian.hunter@xxxxxxxxx are queue-4.4/perf-intel-pt-ensure-ip-is-zero-when-state-is-intel_pt_state_no_ip.patch queue-4.4/perf-intel-pt-move-decoder-error-setting-into-one-condition.patch queue-4.4/perf-intel-pt-clear-fup-flag-on-error.patch queue-4.4/perf-intel-pt-improve-sample-timestamp.patch queue-4.4/perf-intel-pt-fix-missing-stack-clear.patch