On 10/30/2011 06:53 PM, Gleb Natapov wrote: > From: Avi Kivity <avi@xxxxxxxxxx> This has changed significantly, so please update the authorship. You can say 'based on original patch by ...' to provide due credit. > Use perf_events to emulate an architectural PMU, version 2. > + > +/* mapping between fixed pmc index and arch_events array */ > +int fixed_pmc_events[] = {1, 0, 2}; > + > +static bool pmc_is_gp(struct kvm_pmc *pmc) > +{ > + return pmc->type == KVM_PMC_GP; > +} > + > +static inline u64 pmc_bitmask(struct kvm_pmc *pmc) > +{ > + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; > + > + return pmc_is_gp(pmc) ? pmu->gp_counter_bitmask : > + pmu->fixed_counter_bitmask; > +} Nicer to just push the bitmask (or bitwidth) into the counter itself. > + > +static inline int pmc_to_global_idx(struct kvm_pmc *pmc) > +{ > + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; > + struct kvm_pmc *counters; > + int shift; > + > + if (pmc_is_gp(pmc)) { > + counters = pmu->gp_counters; > + shift = X86_PMC_IDX_GENERIC; > + } else { > + counters = pmu->fixed_counters; > + shift = X86_PMC_IDX_FIXED; > + } > + > + return pmc - counters + shift; > +} Again, push the global index into struct kvm_pmc. > + > +static void kvm_perf_overflow(struct perf_event *perf_event, > + struct perf_sample_data *data, > + struct pt_regs *regs) > +{ > + struct kvm_pmc *pmc = perf_event->overflow_handler_context; > + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; > + __set_bit(pmc_to_global_idx(pmc), > + (unsigned long *)&pmu->global_status); > +} > + > +static void kvm_perf_overflow_intr(struct perf_event *perf_event, > + struct perf_sample_data *data, struct pt_regs *regs) > +{ > + struct kvm_pmc *pmc = perf_event->overflow_handler_context; > + struct kvm_pmu *pmu = &pmc->vcpu->arch.pmu; > + if (!__test_and_set_bit(pmc_to_global_idx(pmc), > + (unsigned long *)&pmu->reprogram_pmi)) { > + kvm_perf_overflow(perf_event, data, regs); > + kvm_make_request(KVM_REQ_PMU, pmc->vcpu); > + } > +} Is it safe to use the __ versions here? Do we need to follow kvm_make_request() with kvm_vcpu_kick()? If there is a skew between the overflow and the host PMI, the guest might have executed a HLT. > + > +static void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 en_pmi, int idx) > +{ > + unsigned en = en_pmi & 0x3; > + bool pmi = en_pmi & 0x8; > + > + stop_counter(pmc); > + > + if (!en || !pmc_enabled(pmc)) > + return; > + > + reprogram_counter(pmc, PERF_TYPE_HARDWARE, > + arch_events[fixed_pmc_events[idx]].event_type, > + !(en & 0x2), /* exclude user */ > + !(en & 0x1), /* exclude kernel */ > + pmi); Are there no #defines for those constants? > +} > + > +#define FIXED_EN_PMI(R, I) (((R) >> ((I) * 4)) & 0xf) function > + default: > + if ((pmc = get_gp_pmc(pmu, index, MSR_IA32_PERFCTR0)) || > + (pmc = get_fixed_pmc(pmu, index))) { > + data = (s64)(s32)data; > + pmc->counter += data - read_pmc(pmc); > + return 0; > + } else if ((pmc = get_gp_pmc(pmu, index, MSR_P6_EVNTSEL0))) { > + if (data == pmc->eventsel) > + return 0; > + if (!(data & 0xffffffff00200000ull)) { > + reprogram_gp_counter(pmc, data); > + return 0; > + } > + } > + } > + return 1; > +} > + > + > +void kvm_pmu_cpuid_update(struct kvm_vcpu *vcpu) > +{ > + struct kvm_pmu *pmu = &vcpu->arch.pmu; > + struct kvm_cpuid_entry2 *entry; > + unsigned bitmap_len; > + > + pmu->nr_arch_gp_counters = 0; > + pmu->nr_arch_fixed_counters = 0; > + pmu->fixed_counter_bitmask = 0; > + pmu->version = 0; > + > + entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); > + if (!entry) > + return; > + > + pmu->version = entry->eax & 0xff; > + if (!pmu->version) > + return; > + > + pmu->nr_arch_gp_counters = min((int)(entry->eax >> 8) & 0xff, > + X86_PMC_MAX_GENERIC); > + pmu->gp_counter_bitmask = ((u64)1 << ((entry->eax >> 16) & 0xff)) - 1; > + bitmap_len = (entry->eax >> 24) & 0xff; > + pmu->available_event_types = ~entry->ebx & ((1ull << bitmap_len) - 1); > + > + if (pmu->version > 1) { > + pmu->nr_arch_fixed_counters = min((int)(entry->edx) & 0x1f, > + X86_PMC_MAX_FIXED); Misplaced parentheses (though no effect on generated code). > + pmu->fixed_counter_bitmask = > + ((u64)1 << ((entry->edx >> 5) & 0xff)) - 1; The user can cause this to be very small (even zero). Can this cause an NMI storm? > + pmu->global_ctrl_mask = ~(((1 << pmu->nr_arch_gp_counters) - 1) > + | (((1ull << pmu->nr_arch_fixed_counters) - 1) > + << X86_PMC_IDX_FIXED)); > + } else > + pmu->global_ctrl = (1 << pmu->nr_arch_gp_counters) - 1; > +} Nicer to just return early if version < 2; less indentation and easier to prepare for version 3. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html