On 2021/5/17 17:14, Peter Zijlstra wrote:
On Tue, May 11, 2021 at 10:42:05AM +0800, Like Xu wrote:
@@ -99,6 +109,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
bool exclude_kernel, bool intr,
bool in_tx, bool in_tx_cp)
{
+ struct kvm_pmu *pmu = vcpu_to_pmu(pmc->vcpu);
struct perf_event *event;
struct perf_event_attr attr = {
.type = type,
@@ -110,6 +121,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
.exclude_kernel = exclude_kernel,
.config = config,
};
+ bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
attr.sample_period = get_sample_period(pmc, pmc->counter);
@@ -124,9 +136,23 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
attr.sample_period = 0;
attr.config |= HSW_IN_TX_CHECKPOINTED;
}
+ if (pebs) {
+ /*
+ * The non-zero precision level of guest event makes the ordinary
+ * guest event becomes a guest PEBS event and triggers the host
+ * PEBS PMI handler to determine whether the PEBS overflow PMI
+ * comes from the host counters or the guest.
+ *
+ * For most PEBS hardware events, the difference in the software
+ * precision levels of guest and host PEBS events will not affect
+ * the accuracy of the PEBS profiling result, because the "event IP"
+ * in the PEBS record is calibrated on the guest side.
+ */
+ attr.precise_ip = 1;
+ }
event = perf_event_create_kernel_counter(&attr, -1, current,
- intr ? kvm_perf_overflow_intr :
+ (intr || pebs) ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc);
How would pebs && !intr be possible?
I don't think it's possible.
Also; wouldn't this be more legible
when written like:
perf_overflow_handler_t ovf = kvm_perf_overflow;
...
if (intr)
ovf = kvm_perf_overflow_intr;
...
event = perf_event_create_kernel_counter(&attr, -1, current, ovf, pmc);
Please yell if you don't like this:
diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
index 711294babb97..a607f5a1b9cd 100644
--- a/arch/x86/kvm/pmu.c
+++ b/arch/x86/kvm/pmu.c
@@ -122,6 +122,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc,
u32 type,
.config = config,
};
bool pebs = test_bit(pmc->idx, (unsigned long *)&pmu->pebs_enable);
+ perf_overflow_handler_t ovf = (intr || pebs) ?
+ kvm_perf_overflow_intr : kvm_perf_overflow;
attr.sample_period = get_sample_period(pmc, pmc->counter);
@@ -151,9 +153,7 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc,
u32 type,
attr.precise_ip = 1;
}
- event = perf_event_create_kernel_counter(&attr, -1, current,
- (intr || pebs) ?
kvm_perf_overflow_intr :
- kvm_perf_overflow, pmc);
+ event = perf_event_create_kernel_counter(&attr, -1, current, ovf, pmc);
if (IS_ERR(event)) {
pr_debug_ratelimited("kvm_pmu: event creation failed %ld
for pmc->idx = %d\n",
PTR_ERR(event), pmc->idx);