On 11/03/2014 12:17 PM, Radim Krčmář wrote: > 2014-10-31 12:05-0400, Wei Huang: >> This patch implemented vPMU for AMD platform. The design piggybacks >> on the existing Intel structs (kvm_pmu and kvm_pmc), but only uses >> the parts of generic counters. The kvm_pmu_ops interface is also >> initialized in this patch. >> >> Signed-off-by: Wei Huang <wei@xxxxxxxxxx> >> --- >> arch/x86/kvm/pmu_amd.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++--- >> 1 file changed, 318 insertions(+), 14 deletions(-) >> >> diff --git a/arch/x86/kvm/pmu_amd.c b/arch/x86/kvm/pmu_amd.c >> index 6d6e1c3..19cfe26 100644 >> --- a/arch/x86/kvm/pmu_amd.c >> +++ b/arch/x86/kvm/pmu_amd.c >> @@ -16,58 +16,362 @@ >> #include <linux/types.h> >> #include <linux/kvm_host.h> >> #include <linux/perf_event.h> >> -#include <asm/perf_event.h> >> #include "x86.h" >> #include "cpuid.h" >> #include "lapic.h" >> >> -void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu) >> +/* duplicated from amd_perfmon_event_map, K7 and above should work */ >> +static struct kvm_event_hw_type_mapping amd_event_mapping[] = { >> + [0] = { 0x76, 0x00, PERF_COUNT_HW_CPU_CYCLES }, >> + [1] = { 0xc0, 0x00, PERF_COUNT_HW_INSTRUCTIONS }, >> + [2] = { 0x80, 0x00, PERF_COUNT_HW_CACHE_REFERENCES }, >> + [3] = { 0x81, 0x00, PERF_COUNT_HW_CACHE_MISSES }, > >> + [4] = { 0xc4, 0x00, PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, >> + [5] = { 0xc5, 0x00, PERF_COUNT_HW_BRANCH_MISSES }, > > amd_perfmon_event_map has 0xc2 and 0xc3. Sorry, will fix. > >> + [6] = { 0xd0, 0x00, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, >> + [7] = { 0xd1, 0x00, PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, >> +}; >> + >> +static inline bool pmc_is_gp(struct kvm_pmc *pmc) >> { >> + return pmc->type == KVM_PMC_GP; >> } > > What is the benefit of having the same implementation in both files? > No benefits. In fact I can remove it from AMD code because it is always true for AMD. Will fix. > I'd prefer to have it in pmu.h and I'll try to note all functions that > look identical. As always, you can ignore anything in parentheses, > there are only few real issues with this patch. > >> >> -int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) >> +static inline struct kvm_pmc *get_gp_pmc(struct kvm_pmu *pmu, u32 msr, >> + u32 base) > > (pmu.h) I will try to merge common ones into pmu.h in next re-spin. This applies to the rest (pmu.h and pmu.c) in your replies. > >> { >> - return 1; >> + if (msr >= base && msr < base + pmu->nr_arch_gp_counters) >> + return &pmu->gp_counters[msr - base]; >> + >> + return NULL; >> } >> >> -int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) >> +static inline u64 pmc_bitmask(struct kvm_pmc *pmc) > > (pmu.h) > >> { >> - return 1; >> + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; >> + >> + return pmu->counter_bitmask[pmc->type]; >> } >> >> -int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) >> +static struct kvm_pmc *global_idx_to_pmc(struct kvm_pmu *pmu, int idx) >> { >> - return 1; >> + return get_gp_pmc(pmu, MSR_K7_EVNTSEL0 + idx, MSR_K7_EVNTSEL0); >> } >> >> -int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) >> +void amd_deliver_pmi(struct kvm_vcpu *vcpu) > > static. > > (pmu.c, rename to deliver_pmi and reuse for intel.) > Same here. I will try to merge them as much as I can. >> { >> - return 1; >> + if (vcpu->arch.apic) >> + kvm_apic_local_deliver(vcpu->arch.apic, APIC_LVTPC); >> } >> >> -bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) >> +static void trigger_pmi(struct irq_work *irq_work) > > (pmu.c) > >> { >> - return 0; >> + struct kvm_pmu *pmu = container_of(irq_work, struct kvm_pmu, >> + irq_work); >> + struct kvm_vcpu *vcpu = container_of(pmu, struct kvm_vcpu, >> + arch.pmu); >> + >> + amd_deliver_pmi(vcpu); >> } >> >> -void amd_deliver_pmi(struct kvm_vcpu *vcpu) >> +static u64 read_pmc(struct kvm_pmc *pmc) > > (pmu.c) > >> +{ >> + u64 counter, enabled, running, result, delta, prev; >> + >> + counter = pmc->counter; >> + prev = pmc->counter; >> + >> + if (pmc->perf_event) { >> + delta = perf_event_read_value(pmc->perf_event, >> + &enabled, &running); >> + counter += delta; >> + } >> + >> + result = counter & pmc_bitmask(pmc); >> + >> + return result; >> +} > > No. The one intel uses is better. > (This one looks like a relic from experimenting.) Yes, I inserted some debug message using "result" variable. Will fix. > >> + >> +static void stop_counter(struct kvm_pmc *pmc) > > (pmu.c) > >> +{ >> + if (pmc->perf_event) { >> + pmc->counter = read_pmc(pmc); >> + perf_event_release_kernel(pmc->perf_event); >> + pmc->perf_event = NULL; >> + } >> +} >> + >> +static unsigned find_hw_type_event(struct kvm_pmu *pmu, u8 event_select, >> + u8 unit_mask) >> +{ > > (Intel calls this find_arch_event.) I don't quite agree with existing naming, find_arch_event, in pmu_intel.c. This function isn't directly related to arch event. It is for mapping from perf_counters to linux_perf_event. But I only changed the version pmu_amd.c though. > >> + int i; >> + >> + for (i = 0; i < ARRAY_SIZE(amd_event_mapping); i++) >> + if (amd_event_mapping[i].eventsel == event_select >> + && amd_event_mapping[i].unit_mask == unit_mask) > > (And it could be less extensible, but shorter: > return amd_event_mapping[i].event_type; > } > return PERF_COUNT_HW_MAX; > } > ) Will fix > >> + break; >> + >> + if (i == ARRAY_SIZE(amd_event_mapping)) >> + return PERF_COUNT_HW_MAX; >> + >> + return amd_event_mapping[i].event_type; >> +} >> + >> +static void kvm_perf_overflow_intr(struct perf_event *perf_event, >> + struct perf_sample_data *data, struct pt_regs *regs) >> +{ > > (pmu.c) > >> + struct kvm_pmc *pmc = perf_event->overflow_handler_context; >> + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; >> + >> + if (!test_and_set_bit(pmc->idx, >> + (unsigned long *)&pmu->reprogram_pmi)) { > > (The useless set_bit for intel is not that bad to keep them separate; > please mind my radicality when it comes to abstracting.) > >> + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); >> + >> + if (!kvm_is_in_guest()) >> + irq_work_queue(&pmc->vcpu->arch.pmu.irq_work); >> + else >> + kvm_make_request(KVM_REQ_PMI, pmc->vcpu); >> + } >> +} >> + >> +static void kvm_perf_overflow(struct perf_event *perf_event, >> + struct perf_sample_data *data, >> + struct pt_regs *regs) >> +{ > > (pmu.c, ditto.) > >> + struct kvm_pmc *pmc = perf_event->overflow_handler_context; >> + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; >> + >> + if (!test_and_set_bit(pmc->idx, >> + (unsigned long *)&pmu->reprogram_pmi)) { >> + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); >> + } >> +} >> + >> +static void reprogram_counter(struct kvm_pmc *pmc, u32 type, >> + unsigned config, bool exclude_user, bool exclude_kernel, >> + bool intr) > > (Could be merged in my world, I'll comment on previous patch.) Will try. > >> +{ >> + struct perf_event *event; >> + struct perf_event_attr attr = { >> + .type = type, >> + .size = sizeof(attr), >> + .pinned = true, >> + .exclude_idle = true, >> + .exclude_host = 1, >> + .exclude_user = exclude_user, >> + .exclude_kernel = exclude_kernel, >> + .config = config, >> + }; >> + >> + attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); >> + >> + event = perf_event_create_kernel_counter(&attr, -1, current, >> + intr?kvm_perf_overflow_intr : >> + kvm_perf_overflow, pmc); >> + if (IS_ERR(event)) { >> + printk_once("kvm: pmu event creation failed %ld\n", >> + PTR_ERR(event)); >> + return; >> + } >> + >> + pmc->perf_event = event; >> + clear_bit(pmc->idx, (unsigned long *)&pmc->vcpu->arch.pmu.reprogram_pmi); >> +} >> + >> + >> +static void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) >> +{ > > (Ditto.) > >> + unsigned config, type = PERF_TYPE_RAW; >> + u8 event_select, unit_mask; >> + >> + if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) >> + printk_once("kvm pmu: pin control bit is ignored\n"); >> + >> + pmc->eventsel = eventsel; >> + >> + stop_counter(pmc); >> + >> + if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE)) >> + return; >> + >> + event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; >> + unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; >> + >> + if (!(eventsel & (ARCH_PERFMON_EVENTSEL_EDGE | >> + ARCH_PERFMON_EVENTSEL_INV | >> + ARCH_PERFMON_EVENTSEL_CMASK | >> + HSW_IN_TX | >> + HSW_IN_TX_CHECKPOINTED))) { >> + config = find_hw_type_event(&pmc->vcpu->arch.pmu, event_select, >> + unit_mask); >> + if (config != PERF_COUNT_HW_MAX) >> + type = PERF_TYPE_HARDWARE; >> + } >> + >> + if (type == PERF_TYPE_RAW) >> + config = eventsel & X86_RAW_EVENT_MASK; >> + >> + reprogram_counter(pmc, type, config, >> + !(eventsel & ARCH_PERFMON_EVENTSEL_USR), >> + !(eventsel & ARCH_PERFMON_EVENTSEL_OS), >> + eventsel & ARCH_PERFMON_EVENTSEL_INT); >> +} >> + >> +static void reprogram_idx(struct kvm_pmu *pmu, int idx) >> { >> + struct kvm_pmc *pmc = global_idx_to_pmc(pmu, idx); >> + >> + if (!pmc || !pmc_is_gp(pmc)) >> + return; >> + reprogram_gp_counter(pmc, pmc->eventsel); >> } >> >> void amd_handle_pmu_event(struct kvm_vcpu *vcpu) >> { > > static. > > (pmu.c) > >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + u64 bitmask; >> + int bit; >> + >> + bitmask = pmu->reprogram_pmi; >> + >> + for_each_set_bit(bit, (unsigned long *)&bitmask, X86_PMC_IDX_MAX) { >> + struct kvm_pmc *pmc = global_idx_to_pmc(pmu, bit); >> + >> + if (unlikely(!pmc || !pmc->perf_event)) { >> + clear_bit(bit, (unsigned long *)&pmu->reprogram_pmi); >> + continue; >> + } >> + >> + reprogram_idx(pmu, bit); >> + } >> } >> >> void amd_pmu_reset(struct kvm_vcpu *vcpu) > > static. > >> { >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + int i; >> + >> + irq_work_sync(&pmu->irq_work); >> + >> + for (i = 0; i < AMD64_NUM_COUNTERS; i++) { >> + struct kvm_pmc *pmc = &pmu->gp_counters[i]; >> + >> + stop_counter(pmc); >> + pmc->counter = pmc->eventsel = 0; >> + } >> +} >> + >> +void amd_pmu_destroy(struct kvm_vcpu *vcpu) > > static. > >> +{ >> + amd_pmu_reset(vcpu); >> +} >> + >> +int amd_pmu_check_pmc(struct kvm_vcpu *vcpu, unsigned pmc) > > static. > >> +{ >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + >> + pmc &= ~(3u << 30); > > (-1u >> 2.) > >> + return (pmc >= pmu->nr_arch_gp_counters); >> +} >> + >> +int amd_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data) > > static. > >> +{ >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + struct kvm_pmc *counters; >> + u64 ctr; >> + >> + pmc &= ~(3u << 30); >> + if (pmc >= pmu->nr_arch_gp_counters) >> + return 1; >> + >> + counters = pmu->gp_counters; >> + ctr = read_pmc(&counters[pmc]); >> + *data = ctr; >> + >> + return 0; >> +} >> + >> +int amd_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) > > static. > >> +{ >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + struct kvm_pmc *pmc; >> + u32 index = msr_info->index; >> + u64 data = msr_info->data; >> + >> + if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) { >> + if (!msr_info->host_initiated) >> + data = (s64)data; >> + pmc->counter += data - read_pmc(pmc); >> + return 0; >> + } else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) { >> + if (data == pmc->eventsel) >> + return 0; >> + if (!(data & pmu->reserved_bits)) { >> + reprogram_gp_counter(pmc, data); >> + return 0; >> + } >> + } >> + >> + return 1; >> +} >> + >> +int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data) > > static. > >> +{ >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + struct kvm_pmc *pmc; >> + >> + if ((pmc = get_gp_pmc(pmu, index, MSR_K7_PERFCTR0))) { >> + *data = read_pmc(pmc); >> + return 0; >> + } else if ((pmc = get_gp_pmc(pmu, index, MSR_K7_EVNTSEL0))) { >> + *data = pmc->eventsel; >> + return 0; >> + } >> + >> + return 1; >> +} >> + >> +void amd_pmu_cpuid_update(struct kvm_vcpu *vcpu) > > static. > >> +{ >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + >> + pmu->nr_arch_gp_counters = 0; >> + pmu->nr_arch_fixed_counters = 0; >> + pmu->counter_bitmask[KVM_PMC_GP] = 0; >> + pmu->version = 0; >> + pmu->reserved_bits = 0xffffffff00200000ull; >> + > >> + /* configuration */ >> + pmu->nr_arch_gp_counters = 4; > > AMD64_NUM_COUNTERS? Yes, will fix. > >> + pmu->nr_arch_fixed_counters = 0; >> + pmu->counter_bitmask[KVM_PMC_GP] = ((u64)1 << 48) - 1; > > amd_pmu_cpuid_update doesn't depend on cpuid at all. > (I'd keep this function empty.) > >> } >> >> void amd_pmu_init(struct kvm_vcpu *vcpu) > > static. > >> { >> + int i; >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + >> + memset(pmu, 0, sizeof(*pmu)); >> + for (i = 0; i < AMD64_NUM_COUNTERS ; i++) { >> + pmu->gp_counters[i].type = KVM_PMC_GP; >> + pmu->gp_counters[i].vcpu = vcpu; >> + pmu->gp_counters[i].idx = i; >> + } >> + >> + init_irq_work(&pmu->irq_work, trigger_pmi); >> + amd_pmu_cpuid_update(vcpu); >> } >> >> -void amd_pmu_destroy(struct kvm_vcpu *vcpu) >> +bool amd_is_pmu_msr(struct kvm_vcpu *vcpu, u32 msr) > > static. > >> { >> + struct kvm_pmu *pmu = &vcpu->arch.pmu; >> + int ret; >> + >> + ret = get_gp_pmc(pmu, msr, MSR_K7_PERFCTR0) || >> + get_gp_pmc(pmu, msr, MSR_K7_EVNTSEL0); >> + >> + return ret; >> } >> >> struct kvm_pmu_ops amd_pmu_ops = { >> -- >> 1.8.3.1 >> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html