On Thu, 01 Dec 2022 16:51:46 +0000, Ricardo Koller <ricarkol@xxxxxxxxxx> wrote: > > On Thu, Dec 01, 2022 at 08:47:47AM -0800, Ricardo Koller wrote: > > On Sun, Nov 13, 2022 at 04:38:20PM +0000, Marc Zyngier wrote: > > > The PMU architecture makes a subtle difference between a 64bit > > > counter and a counter that has a 64bit overflow. This is for example > > > the case of the cycle counter, which can generate an overflow on > > > a 32bit boundary if PMCR_EL0.LC==0 despite the accumulation being > > > done on 64 bits. > > > > > > Use this distinction in the few cases where it matters in the code, > > > as we will reuse this with PMUv3p5 long counters. > > > > > > Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> > > > --- > > > arch/arm64/kvm/pmu-emul.c | 43 ++++++++++++++++++++++++++++----------- > > > 1 file changed, 31 insertions(+), 12 deletions(-) > > > > > > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c > > > index 69b67ab3c4bf..d050143326b5 100644 > > > --- a/arch/arm64/kvm/pmu-emul.c > > > +++ b/arch/arm64/kvm/pmu-emul.c > > > @@ -50,6 +50,11 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) > > > * @select_idx: The counter index > > > */ > > > static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) > > > +{ > > > + return (select_idx == ARMV8_PMU_CYCLE_IDX); > > > +} > > > + > > > +static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) > > > { > > > return (select_idx == ARMV8_PMU_CYCLE_IDX && > > > __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); > > > @@ -57,7 +62,8 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) > > > > > > static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) > > > { > > > - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); > > > + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && > > > + !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); > > > } > > > > > > static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) > > > @@ -97,7 +103,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) > > > counter += perf_event_read_value(pmc->perf_event, &enabled, > > > &running); > > > > > > - if (select_idx != ARMV8_PMU_CYCLE_IDX) > > > + if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) > > > counter = lower_32_bits(counter); > > > > > > return counter; > > > @@ -423,6 +429,23 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, > > > } > > > } > > > > > > +/* Compute the sample period for a given counter value */ > > > +static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) > > > +{ > > > + u64 val; > > > + > > > + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { > > > + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) > > > + val = -(counter & GENMASK(31, 0)); > > > > If I understand things correctly, this might be missing another mask: > > > > + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) { > > + val = -(counter & GENMASK(31, 0)); > > + val &= GENMASK(31, 0); > > + } else { > > > > For example, if the counter is 64-bits wide, it overflows at 32-bits, > > and it is _one_ sample away from overflowing at 32-bits: > > > > 0x01010101_ffffffff > > > > Then "val = (-counter) & GENMASK(63, 0)" would return 0xffffffff_00000001. > > Sorry, this should be: > > Then "val = -(counter & GENMASK(31, 0))" would return 0xffffffff_00000001. > > > But the right period is 0x00000000_00000001 (it's one sample away from > > overflowing). Yup, this is a bit bogus. But this can be simplified by falling back to the normal 32bit handling (on top of the pmu-unchained branch): diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c index d8ea39943086..24908400e190 100644 --- a/arch/arm64/kvm/pmu-emul.c +++ b/arch/arm64/kvm/pmu-emul.c @@ -461,14 +461,10 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter) { u64 val; - if (kvm_pmc_is_64bit(pmc)) { - if (!kvm_pmc_has_64bit_overflow(pmc)) - val = -(counter & GENMASK(31, 0)); - else - val = (-counter) & GENMASK(63, 0); - } else { + if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) + val = (-counter) & GENMASK(63, 0); + else val = (-counter) & GENMASK(31, 0); - } return val; } which satisfies the requirement without any extra masking, and makes it plain that only a 64bit counter with 64bit overflow gets its period computed on the full 64bit, and that anyone else gets the 32bit truncation. I'll stash yet another patch on top and push it onto -next. Thanks! M. -- Without deviation from the norm, progress is not possible. _______________________________________________ kvmarm mailing list kvmarm@xxxxxxxxxxxxxxxxxxxxx https://lists.cs.columbia.edu/mailman/listinfo/kvmarm