On Mon, Dec 05, 2022 at 12:05:32PM +0000, Marc Zyngier wrote: > On Thu, 01 Dec 2022 16:51:46 +0000, > Ricardo Koller <ricarkol@xxxxxxxxxx> wrote: > > > > On Thu, Dec 01, 2022 at 08:47:47AM -0800, Ricardo Koller wrote: > > > On Sun, Nov 13, 2022 at 04:38:20PM +0000, Marc Zyngier wrote: > > > > The PMU architecture makes a subtle difference between a 64bit > > > > counter and a counter that has a 64bit overflow. This is for example > > > > the case of the cycle counter, which can generate an overflow on > > > > a 32bit boundary if PMCR_EL0.LC==0 despite the accumulation being > > > > done on 64 bits. > > > > > > > > Use this distinction in the few cases where it matters in the code, > > > > as we will reuse this with PMUv3p5 long counters. > > > > > > > > Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> > > > > --- > > > > arch/arm64/kvm/pmu-emul.c | 43 ++++++++++++++++++++++++++++----------- > > > > 1 file changed, 31 insertions(+), 12 deletions(-) > > > > > > > > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c > > > > index 69b67ab3c4bf..d050143326b5 100644 > > > > --- a/arch/arm64/kvm/pmu-emul.c > > > > +++ b/arch/arm64/kvm/pmu-emul.c > > > > @@ -50,6 +50,11 @@ static u32 kvm_pmu_event_mask(struct kvm *kvm) > > > > * @select_idx: The counter index > > > > */ > > > > static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) > > > > +{ > > > > + return (select_idx == ARMV8_PMU_CYCLE_IDX); > > > > +} > > > > + > > > > +static bool kvm_pmu_idx_has_64bit_overflow(struct kvm_vcpu *vcpu, u64 select_idx) > > > > { > > > > return (select_idx == ARMV8_PMU_CYCLE_IDX && > > > > __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); > > > > @@ -57,7 +62,8 @@ static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) > > > > > > > > static bool kvm_pmu_counter_can_chain(struct kvm_vcpu *vcpu, u64 idx) > > > > { > > > > - return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX); > > > > + return (!(idx & 1) && (idx + 1) < ARMV8_PMU_CYCLE_IDX && > > > > + !kvm_pmu_idx_has_64bit_overflow(vcpu, idx)); > > > > } > > > > > > > > static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) > > > > @@ -97,7 +103,7 @@ u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) > > > > counter += perf_event_read_value(pmc->perf_event, &enabled, > > > > &running); > > > > > > > > - if (select_idx != ARMV8_PMU_CYCLE_IDX) > > > > + if (!kvm_pmu_idx_is_64bit(vcpu, select_idx)) > > > > counter = lower_32_bits(counter); > > > > > > > > return counter; > > > > @@ -423,6 +429,23 @@ static void kvm_pmu_counter_increment(struct kvm_vcpu *vcpu, > > > > } > > > > } > > > > > > > > +/* Compute the sample period for a given counter value */ > > > > +static u64 compute_period(struct kvm_vcpu *vcpu, u64 select_idx, u64 counter) > > > > +{ > > > > + u64 val; > > > > + > > > > + if (kvm_pmu_idx_is_64bit(vcpu, select_idx)) { > > > > + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) > > > > + val = -(counter & GENMASK(31, 0)); > > > > > > If I understand things correctly, this might be missing another mask: > > > > > > + if (!kvm_pmu_idx_has_64bit_overflow(vcpu, select_idx)) { > > > + val = -(counter & GENMASK(31, 0)); > > > + val &= GENMASK(31, 0); > > > + } else { > > > > > > For example, if the counter is 64-bits wide, it overflows at 32-bits, > > > and it is _one_ sample away from overflowing at 32-bits: > > > > > > 0x01010101_ffffffff > > > > > > Then "val = (-counter) & GENMASK(63, 0)" would return 0xffffffff_00000001. > > > > Sorry, this should be: > > > > Then "val = -(counter & GENMASK(31, 0))" would return 0xffffffff_00000001. > > > > > But the right period is 0x00000000_00000001 (it's one sample away from > > > overflowing). > > Yup, this is a bit bogus. But this can be simplified by falling back > to the normal 32bit handling (on top of the pmu-unchained branch): > > diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c > index d8ea39943086..24908400e190 100644 > --- a/arch/arm64/kvm/pmu-emul.c > +++ b/arch/arm64/kvm/pmu-emul.c > @@ -461,14 +461,10 @@ static u64 compute_period(struct kvm_pmc *pmc, u64 counter) > { > u64 val; > > - if (kvm_pmc_is_64bit(pmc)) { > - if (!kvm_pmc_has_64bit_overflow(pmc)) > - val = -(counter & GENMASK(31, 0)); > - else > - val = (-counter) & GENMASK(63, 0); > - } else { > + if (kvm_pmc_is_64bit(pmc) && kvm_pmc_has_64bit_overflow(pmc)) Great, thanks! Yes, that definitely makes things simpler ^. > + val = (-counter) & GENMASK(63, 0); > + else > val = (-counter) & GENMASK(31, 0); > - } > > return val; > } > > which satisfies the requirement without any extra masking, and makes > it plain that only a 64bit counter with 64bit overflow gets its period > computed on the full 64bit, and that anyone else gets the 32bit > truncation. > > I'll stash yet another patch on top and push it onto -next. > > Thanks! > > M. > > -- > Without deviation from the norm, progress is not possible.