1.) Background. 1.1) In arm64, run a virtual guest with Qemu, and bind the guest to core 33 and run program "a" in guest. The code of "a" shows below: ---------------------------------------------------------- #include <stdio.h> int main() { unsigned long i = 0; for (;;) { i++; } printf("i:%ld\n", i); return 0; } ---------------------------------------------------------- 1.2) Use the following perf command in host: #perf stat -e cycles:G,cycles:H -C 33 -I 1000 sleep 1 # time counts unit events 1.000817400 3,299,471,572 cycles:G 1.000817400 3,240,586 cycles:H This result is correct, my cpu's frequency is 3.3G. 1.3) Use the following perf command in host: #perf stat -e cycles:G,cycles:H -C 33 -d -d -I 1000 sleep 1 time counts unit events 1.000831480 153,634,097 cycles:G (70.03%) 1.000831480 3,147,940,599 cycles:H (70.03%) 1.000831480 1,143,598,527 L1-dcache-loads (70.03%) 1.000831480 9,986 L1-dcache-load-misses # 0.00% of all L1-dcache accesses (70.03%) 1.000831480 <not supported> LLC-loads 1.000831480 <not supported> LLC-load-misses 1.000831480 580,887,696 L1-icache-loads (70.03%) 1.000831480 77,855 L1-icache-load-misses # 0.01% of all L1-icache accesses (70.03%) 1.000831480 6,112,224,612 dTLB-loads (70.03%) 1.000831480 16,222 dTLB-load-misses # 0.00% of all dTLB cache accesses (69.94%) 1.000831480 590,015,996 iTLB-loads (59.95%) 1.000831480 505 iTLB-load-misses # 0.00% of all iTLB cache accesses (59.95%) This result is wrong. The "cycle:G" should be nearly 3.3G. 2.) Root cause. There is only 7 counters in my arm64 platform: (one cycle counter) + (6 normal counters) In 1.3 above, we will use 10 event counters. Since we only have 7 counters, the perf core will trigger event multiplexing in hrtimer: merge_sched_in() -->perf_mux_hrtimer_restart() --> perf_rotate_context(). In the perf_rotate_context(), it does not restore some PMU registers as context_switch() does. In context_switch(): kvm_sched_in() --> kvm_vcpu_pmu_restore_guest() kvm_sched_out() --> kvm_vcpu_pmu_restore_host() So we got wrong result. 3.) About this patch. 3.1) Add arch_perf_rotate_pmu_set() 3.2) Add is_guest(). Check the context for hrtimer. 3.3) In arm64's arch_perf_rotate_pmu_set(), set the PMU registers by the context. 4.) Test result of this patch: #perf stat -e cycles:G,cycles:H -C 33 -d -d -I 1000 sleep 1 time counts unit events 1.000817360 3,297,898,244 cycles:G (70.03%) 1.000817360 2,719,941 cycles:H (70.03%) 1.000817360 883,764 L1-dcache-loads (70.03%) 1.000817360 17,517 L1-dcache-load-misses # 1.98% of all L1-dcache accesses (70.03%) 1.000817360 <not supported> LLC-loads 1.000817360 <not supported> LLC-load-misses 1.000817360 1,033,816 L1-icache-loads (70.03%) 1.000817360 103,839 L1-icache-load-misses # 10.04% of all L1-icache accesses (70.03%) 1.000817360 982,401 dTLB-loads (70.03%) 1.000817360 28,272 dTLB-load-misses # 2.88% of all dTLB cache accesses (69.94%) 1.000817360 972,072 iTLB-loads (59.95%) 1.000817360 772 iTLB-load-misses # 0.08% of all iTLB cache accesses (59.95%) The result is correct. The "cycle:G" is nearly 3.3G now. Signed-off-by: Huang Shijie <shijie@xxxxxxxxxxxxxxxxxxxxxx> --- arch/arm64/kvm/pmu.c | 8 ++++++++ include/linux/kvm_host.h | 1 + kernel/events/core.c | 5 +++++ virt/kvm/kvm_main.c | 9 +++++++++ 4 files changed, 23 insertions(+) diff --git a/arch/arm64/kvm/pmu.c b/arch/arm64/kvm/pmu.c index 121f1a14c829..a6815c3f0c4e 100644 --- a/arch/arm64/kvm/pmu.c +++ b/arch/arm64/kvm/pmu.c @@ -210,6 +210,14 @@ void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) kvm_vcpu_pmu_disable_el0(events_guest); } +void arch_perf_rotate_pmu_set(void) +{ + if (is_guest()) + kvm_vcpu_pmu_restore_guest(NULL); + else + kvm_vcpu_pmu_restore_host(NULL); +} + /* * With VHE, keep track of the PMUSERENR_EL0 value for the host EL0 on the pCPU * where PMUSERENR_EL0 for the guest is loaded, since PMUSERENR_EL0 is switched diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9d3ac7720da9..e350cbc8190f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -931,6 +931,7 @@ void kvm_destroy_vcpus(struct kvm *kvm); void vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); +bool is_guest(void); #ifdef __KVM_HAVE_IOAPIC void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); diff --git a/kernel/events/core.c b/kernel/events/core.c index 6fd9272eec6e..fe78f9d17eba 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -4229,6 +4229,10 @@ ctx_event_to_rotate(struct perf_event_pmu_context *pmu_ctx) return event; } +void __weak arch_perf_rotate_pmu_set(void) +{ +} + static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc) { struct perf_cpu_context *cpuctx = this_cpu_ptr(&perf_cpu_context); @@ -4282,6 +4286,7 @@ static bool perf_rotate_context(struct perf_cpu_pmu_context *cpc) if (task_event || (task_epc && cpu_event)) __pmu_ctx_sched_in(task_epc->ctx, pmu); + arch_perf_rotate_pmu_set(); perf_pmu_enable(pmu); perf_ctx_unlock(cpuctx, cpuctx->task_ctx); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dfbaafbe3a00..a77d336552be 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -218,6 +218,15 @@ void vcpu_load(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(vcpu_load); +/* Do we in the guest? */ +bool is_guest(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = __this_cpu_read(kvm_running_vcpu); + return !!vcpu; +} + void vcpu_put(struct kvm_vcpu *vcpu) { preempt_disable(); -- 2.39.2