Hi Marc, On 3/10/20 7:00 PM, Marc Zyngier wrote: > On 2020-03-10 17:40, Auger Eric wrote: >> Hi Marc, >> >> On 3/10/20 12:03 PM, Marc Zyngier wrote: >>> Hi Eric, >>> >>> On 2020-03-09 18:05, Auger Eric wrote: >>>> Hi Marc, >>>> >>>> On 3/9/20 1:48 PM, Marc Zyngier wrote: >>>>> It can be desirable to expose a PMU to a guest, and yet not want the >>>>> guest to be able to count some of the implemented events (because this >>>>> would give information on shared resources, for example. >>>>> >>>>> For this, let's extend the PMUv3 device API, and offer a way to >>>>> setup a >>>>> bitmap of the allowed events (the default being no bitmap, and thus no >>>>> filtering). >>>>> >>>>> Userspace can thus allow/deny ranges of event. The default policy >>>>> depends on the "polarity" of the first filter setup (default deny >>>>> if the >>>>> filter allows events, and default allow if the filter denies events). >>>>> This allows to setup exactly what is allowed for a given guest. >>>>> >>>>> Note that although the ioctl is per-vcpu, the map of allowed events is >>>>> global to the VM (it can be setup from any vcpu until the vcpu PMU is >>>>> initialized). >>>>> >>>>> Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx> >>>>> --- >>>>> arch/arm64/include/asm/kvm_host.h | 6 +++ >>>>> arch/arm64/include/uapi/asm/kvm.h | 16 ++++++ >>>>> virt/kvm/arm/arm.c | 2 + >>>>> virt/kvm/arm/pmu.c | 84 >>>>> +++++++++++++++++++++++++------ >>>>> 4 files changed, 92 insertions(+), 16 deletions(-) >>>>> >>>>> diff --git a/arch/arm64/include/asm/kvm_host.h >>>>> b/arch/arm64/include/asm/kvm_host.h >>>>> index 57fd46acd058..8e63c618688d 100644 >>>>> --- a/arch/arm64/include/asm/kvm_host.h >>>>> +++ b/arch/arm64/include/asm/kvm_host.h >>>>> @@ -91,6 +91,12 @@ struct kvm_arch { >>>>> * supported. >>>>> */ >>>>> bool return_nisv_io_abort_to_user; >>>>> + >>>>> + /* >>>>> + * VM-wide PMU filter, implemented as a bitmap and big enough >>>>> + * for up to 65536 events >>>>> + */ >>>>> + unsigned long *pmu_filter; >>>>> }; >>>>> >>>>> #define KVM_NR_MEM_OBJS 40 >>>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h >>>>> b/arch/arm64/include/uapi/asm/kvm.h >>>>> index ba85bb23f060..7b1511d6ce44 100644 >>>>> --- a/arch/arm64/include/uapi/asm/kvm.h >>>>> +++ b/arch/arm64/include/uapi/asm/kvm.h >>>>> @@ -159,6 +159,21 @@ struct kvm_sync_regs { >>>>> struct kvm_arch_memory_slot { >>>>> }; >>>>> >>>>> +/* >>>>> + * PMU filter structure. Describe a range of events with a particular >>>>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER. >>>>> + */ >>>>> +struct kvm_pmu_event_filter { >>>>> + __u16 base_event; >>>>> + __u16 nevents; >>>>> + >>>>> +#define KVM_PMU_EVENT_ALLOW 0 >>>>> +#define KVM_PMU_EVENT_DENY 1 >>>>> + >>>>> + __u8 action; >>>>> + __u8 pad[3]; >>>>> +}; >>>>> + >>>>> /* for KVM_GET/SET_VCPU_EVENTS */ >>>>> struct kvm_vcpu_events { >>>>> struct { >>>>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events { >>>>> #define KVM_ARM_VCPU_PMU_V3_CTRL 0 >>>>> #define KVM_ARM_VCPU_PMU_V3_IRQ 0 >>>>> #define KVM_ARM_VCPU_PMU_V3_INIT 1 >>>>> +#define KVM_ARM_VCPU_PMU_V3_FILTER 2 >>>>> #define KVM_ARM_VCPU_TIMER_CTRL 1 >>>>> #define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 >>>>> #define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 >>>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c >>>>> index eda7b624eab8..8d849ac88a44 100644 >>>>> --- a/virt/kvm/arm/arm.c >>>>> +++ b/virt/kvm/arm/arm.c >>>>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) >>>>> free_percpu(kvm->arch.last_vcpu_ran); >>>>> kvm->arch.last_vcpu_ran = NULL; >>>>> >>>>> + bitmap_free(kvm->arch.pmu_filter); >>>>> + >>>>> for (i = 0; i < KVM_MAX_VCPUS; ++i) { >>>>> if (kvm->vcpus[i]) { >>>>> kvm_vcpu_destroy(kvm->vcpus[i]); >>>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c >>>>> index f0d0312c0a55..9f0fd0224d5b 100644 >>>>> --- a/virt/kvm/arm/pmu.c >>>>> +++ b/virt/kvm/arm/pmu.c >>>>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct >>>>> kvm_vcpu *vcpu, u64 select_idx) >>>>> >>>>> kvm_pmu_stop_counter(vcpu, pmc); >>>>> eventsel = data & ARMV8_PMU_EVTYPE_EVENT; >>>>> + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) >>>>> + eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; >>>> nit: >>>> if (pmc->idx == ARMV8_PMU_CYCLE_IDX) >>>> eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES; >>>> else >>>> eventsel = data & ARMV8_PMU_EVTYPE_EVENT; >>> >>> You don't like it? ;-) >> ? eventset set only once instead of 2 times > > The compiler does the right thing, but sore, I'll change it. > >>> >>>>> >>>>> /* Software increment event does't need to be backed by a perf >>>>> event */ >>>> nit: while wer are at it fix the does't typo >>>>> - if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && >>>>> - pmc->idx != ARMV8_PMU_CYCLE_IDX) >>>>> + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR) >>>>> + return; >>>>> + >>>>> + /* >>>>> + * If we have a filter in place and that the event isn't >>>>> allowed, do >>>>> + * not install a perf event either. >>>>> + */ >>>>> + if (vcpu->kvm->arch.pmu_filter && >>>>> + !test_bit(eventsel, vcpu->kvm->arch.pmu_filter)) >>>>> return; >>>>> >>>>> memset(&attr, 0, sizeof(struct perf_event_attr)); >>>>> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct >>>>> kvm_vcpu *vcpu, u64 select_idx) >>>>> attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; >>>>> attr.exclude_hv = 1; /* Don't count EL2 events */ >>>>> attr.exclude_host = 1; /* Don't count host events */ >>>>> - attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? >>>>> - ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; >>>>> + attr.config = eventsel; >>>> So in that case the guest counter will not increment but the guest does >>>> not know the counter is not implemented. Can't this lead to bad user >>>> experience. Shouldn't this disablement be reflected in PMCEID0/1 regs? >>> >>> The whole point is that we want to keep things hidden from the guest. >>> Also, PMCEID{0,1} only describe a small set of events (the architected >>> common events), and not the whole range of microarchitectural events >>> that the CPU implements. >> >> I am still not totally convinced. Things are not totally hidden to the >> guest as the counter does not increment, right? So a guest may try to >> use as it is advertised in PMCEID0/1 but not get the expected results >> leading to potential support request. I agree not all the events are >> described there but your API also allows to filter out some of the ones >> that are advertised. > > I think we're at odds when it comes to the goal of this series. If you > read the CPU TRM, you will find that event X is implemented. You look > at PMCEIDx, and you find it is not. You still get a support request! ;-) Yep that's a weird situation indeed, I haven't thought about the TRM. > > Dropping events from these registers is totally trivial, but I'm not > sure this will reduce the surprise effect. It doesn't hurt anyway, so > I'll implement that. Up to you. Or at least you can document it in the commit msg. Thanks Eric > >>> >>>>> >>>>> counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); >>>>> >>>>> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) >>>>> >>>>> static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) >>>>> { >>>>> - if (!kvm_arm_support_pmu_v3()) >>>>> - return -ENODEV; >>>>> - >>>>> - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) >>>>> - return -ENXIO; >>>>> - >>>>> - if (vcpu->arch.pmu.created) >>>>> - return -EBUSY; >>>>> - >>>>> if (irqchip_in_kernel(vcpu->kvm)) { >>>>> int ret; >>>>> >>>>> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm, >>>>> int irq) >>>>> return true; >>>>> } >>>>> >>>>> +#define NR_EVENTS (ARMV8_PMU_EVTYPE_EVENT + 1) >>>>> + >>>>> int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct >>>>> kvm_device_attr *attr) >>>>> { >>>>> + if (!kvm_arm_support_pmu_v3()) >>>>> + return -ENODEV; >>>>> + >>>>> + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) >>>>> + return -ENODEV; >>>> I see you changed -ENXIO into -ENODEV. wanted? >>> >>> Probably not... but see below. >>> >>>>> + >>>>> + if (vcpu->arch.pmu.created) >>>>> + return -EBUSY; >>>>> + >>>>> switch (attr->attr) { >>>>> case KVM_ARM_VCPU_PMU_V3_IRQ: { >>>>> int __user *uaddr = (int __user *)(long)attr->addr; >>>>> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu >>>>> *vcpu, struct kvm_device_attr *attr) >>>>> if (!irqchip_in_kernel(vcpu->kvm)) >>>>> return -EINVAL; >>>>> >>>>> - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) >>>>> - return -ENODEV; >>>>> - >>> >>> Here's why. I wonder if we already have a problem with the consistency >>> of the >>> error codes returned to userspace. >> OK. Then you may document it in the commit message? > > I still need to work out whether we actually have an issue on that. > > [...] > >>>> not related to this patch but shouldn't we advertise this only with >>>> in-kernel irqchip? >>> >>> We do support the PMU without the in-kernel chip, unfortunately... Yes, >>> supporting this feature was a big mistake. >> But I see in kvm_arm_pmu_v3_set_attr: >> case KVM_ARM_VCPU_PMU_V3_IRQ: >> ../.. >> if (!irqchip_in_kernel(vcpu->kvm)) >> return -EINVAL; > > Ah, I see what you mean. Yes, we probably shouldn't report that the PMU > IRQ attribute is supported when we don't have an in-kernel irqchip. > > Thanks, > > M.