This is a note to let you know that I've just added the patch titled perf intel-pt: Improve sample timestamp to the 4.9-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: perf-intel-pt-improve-sample-timestamp.patch and it can be found in the queue-4.9 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 3f04d98e972b59706bd43d6cc75efac91f8fba50 Mon Sep 17 00:00:00 2001 From: Adrian Hunter <adrian.hunter@xxxxxxxxx> Date: Fri, 26 May 2017 11:17:03 +0300 Subject: perf intel-pt: Improve sample timestamp From: Adrian Hunter <adrian.hunter@xxxxxxxxx> commit 3f04d98e972b59706bd43d6cc75efac91f8fba50 upstream. The decoder uses its current timestamp in samples. Usually that is a timestamp that has already passed, but in some cases it is a timestamp for a branch that the decoder is walking towards, and consequently hasn't reached. Improve that situation by using the pkt_state to determine when to use the current or previous timestamp. Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx> Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx> Link: http://lkml.kernel.org/r/1495786658-18063-3-git-send-email-adrian.hunter@xxxxxxxxx Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 34 ++++++++++++++++++-- 1 file changed, 31 insertions(+), 3 deletions(-) --- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c @@ -64,6 +64,25 @@ enum intel_pt_pkt_state { INTEL_PT_STATE_FUP_NO_TIP, }; +static inline bool intel_pt_sample_time(enum intel_pt_pkt_state pkt_state) +{ + switch (pkt_state) { + case INTEL_PT_STATE_NO_PSB: + case INTEL_PT_STATE_NO_IP: + case INTEL_PT_STATE_ERR_RESYNC: + case INTEL_PT_STATE_IN_SYNC: + case INTEL_PT_STATE_TNT: + return true; + case INTEL_PT_STATE_TIP: + case INTEL_PT_STATE_TIP_PGD: + case INTEL_PT_STATE_FUP: + case INTEL_PT_STATE_FUP_NO_TIP: + return false; + default: + return true; + }; +} + #ifdef INTEL_PT_STRICT #define INTEL_PT_STATE_ERR1 INTEL_PT_STATE_NO_PSB #define INTEL_PT_STATE_ERR2 INTEL_PT_STATE_NO_PSB @@ -99,6 +118,7 @@ struct intel_pt_decoder { uint64_t timestamp; uint64_t tsc_timestamp; uint64_t ref_timestamp; + uint64_t sample_timestamp; uint64_t ret_addr; uint64_t ctc_timestamp; uint64_t ctc_delta; @@ -139,6 +159,7 @@ struct intel_pt_decoder { unsigned int fup_tx_flags; unsigned int tx_flags; uint64_t timestamp_insn_cnt; + uint64_t sample_insn_cnt; uint64_t stuck_ip; int no_progress; int stuck_ip_prd; @@ -898,6 +919,7 @@ static int intel_pt_walk_insn(struct int decoder->tot_insn_cnt += insn_cnt; decoder->timestamp_insn_cnt += insn_cnt; + decoder->sample_insn_cnt += insn_cnt; decoder->period_insn_cnt += insn_cnt; if (err) { @@ -2067,7 +2089,7 @@ static int intel_pt_sync(struct intel_pt static uint64_t intel_pt_est_timestamp(struct intel_pt_decoder *decoder) { - uint64_t est = decoder->timestamp_insn_cnt << 1; + uint64_t est = decoder->sample_insn_cnt << 1; if (!decoder->cbr || !decoder->max_non_turbo_ratio) goto out; @@ -2075,7 +2097,7 @@ static uint64_t intel_pt_est_timestamp(s est *= decoder->max_non_turbo_ratio; est /= decoder->cbr; out: - return decoder->timestamp + est; + return decoder->sample_timestamp + est; } const struct intel_pt_state *intel_pt_decode(struct intel_pt_decoder *decoder) @@ -2131,11 +2153,17 @@ const struct intel_pt_state *intel_pt_de if (err) { decoder->state.err = intel_pt_ext_err(err); decoder->state.from_ip = decoder->ip; + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; } else { decoder->state.err = 0; + if (intel_pt_sample_time(decoder->pkt_state)) { + decoder->sample_timestamp = decoder->timestamp; + decoder->sample_insn_cnt = decoder->timestamp_insn_cnt; + } } - decoder->state.timestamp = decoder->timestamp; + decoder->state.timestamp = decoder->sample_timestamp; decoder->state.est_timestamp = intel_pt_est_timestamp(decoder); decoder->state.cr3 = decoder->cr3; decoder->state.tot_insn_cnt = decoder->tot_insn_cnt; Patches currently in stable-queue which might be from adrian.hunter@xxxxxxxxx are queue-4.9/perf-intel-pt-ensure-ip-is-zero-when-state-is-intel_pt_state_no_ip.patch queue-4.9/perf-intel-pt-ensure-never-to-set-last_ip-when-packet-count-is-zero.patch queue-4.9/perf-intel-pt-use-fup-always-when-scanning-for-an-ip.patch queue-4.9/perf-intel-pt-fix-last_ip-usage.patch queue-4.9/perf-intel-pt-move-decoder-error-setting-into-one-condition.patch queue-4.9/perf-intel-pt-clear-fup-flag-on-error.patch queue-4.9/perf-intel-pt-improve-sample-timestamp.patch queue-4.9/perf-intel-pt-fix-missing-stack-clear.patch