Re: [PATCH v2 1/2] KVM: arm64: Add PMU event filtering infrastructure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Marc,

On 3/10/20 7:00 PM, Marc Zyngier wrote:
> On 2020-03-10 17:40, Auger Eric wrote:
>> Hi Marc,
>>
>> On 3/10/20 12:03 PM, Marc Zyngier wrote:
>>> Hi Eric,
>>>
>>> On 2020-03-09 18:05, Auger Eric wrote:
>>>> Hi Marc,
>>>>
>>>> On 3/9/20 1:48 PM, Marc Zyngier wrote:
>>>>> It can be desirable to expose a PMU to a guest, and yet not want the
>>>>> guest to be able to count some of the implemented events (because this
>>>>> would give information on shared resources, for example.
>>>>>
>>>>> For this, let's extend the PMUv3 device API, and offer a way to
>>>>> setup a
>>>>> bitmap of the allowed events (the default being no bitmap, and thus no
>>>>> filtering).
>>>>>
>>>>> Userspace can thus allow/deny ranges of event. The default policy
>>>>> depends on the "polarity" of the first filter setup (default deny
>>>>> if the
>>>>> filter allows events, and default allow if the filter denies events).
>>>>> This allows to setup exactly what is allowed for a given guest.
>>>>>
>>>>> Note that although the ioctl is per-vcpu, the map of allowed events is
>>>>> global to the VM (it can be setup from any vcpu until the vcpu PMU is
>>>>> initialized).
>>>>>
>>>>> Signed-off-by: Marc Zyngier <maz@xxxxxxxxxx>
>>>>> ---
>>>>>  arch/arm64/include/asm/kvm_host.h |  6 +++
>>>>>  arch/arm64/include/uapi/asm/kvm.h | 16 ++++++
>>>>>  virt/kvm/arm/arm.c                |  2 +
>>>>>  virt/kvm/arm/pmu.c                | 84
>>>>> +++++++++++++++++++++++++------
>>>>>  4 files changed, 92 insertions(+), 16 deletions(-)
>>>>>
>>>>> diff --git a/arch/arm64/include/asm/kvm_host.h
>>>>> b/arch/arm64/include/asm/kvm_host.h
>>>>> index 57fd46acd058..8e63c618688d 100644
>>>>> --- a/arch/arm64/include/asm/kvm_host.h
>>>>> +++ b/arch/arm64/include/asm/kvm_host.h
>>>>> @@ -91,6 +91,12 @@ struct kvm_arch {
>>>>>       * supported.
>>>>>       */
>>>>>      bool return_nisv_io_abort_to_user;
>>>>> +
>>>>> +    /*
>>>>> +     * VM-wide PMU filter, implemented as a bitmap and big enough
>>>>> +     * for up to 65536 events
>>>>> +     */
>>>>> +    unsigned long *pmu_filter;
>>>>>  };
>>>>>
>>>>>  #define KVM_NR_MEM_OBJS     40
>>>>> diff --git a/arch/arm64/include/uapi/asm/kvm.h
>>>>> b/arch/arm64/include/uapi/asm/kvm.h
>>>>> index ba85bb23f060..7b1511d6ce44 100644
>>>>> --- a/arch/arm64/include/uapi/asm/kvm.h
>>>>> +++ b/arch/arm64/include/uapi/asm/kvm.h
>>>>> @@ -159,6 +159,21 @@ struct kvm_sync_regs {
>>>>>  struct kvm_arch_memory_slot {
>>>>>  };
>>>>>
>>>>> +/*
>>>>> + * PMU filter structure. Describe a range of events with a particular
>>>>> + * action. To be used with KVM_ARM_VCPU_PMU_V3_FILTER.
>>>>> + */
>>>>> +struct kvm_pmu_event_filter {
>>>>> +    __u16    base_event;
>>>>> +    __u16    nevents;
>>>>> +
>>>>> +#define KVM_PMU_EVENT_ALLOW    0
>>>>> +#define KVM_PMU_EVENT_DENY    1
>>>>> +
>>>>> +    __u8    action;
>>>>> +    __u8    pad[3];
>>>>> +};
>>>>> +
>>>>>  /* for KVM_GET/SET_VCPU_EVENTS */
>>>>>  struct kvm_vcpu_events {
>>>>>      struct {
>>>>> @@ -329,6 +344,7 @@ struct kvm_vcpu_events {
>>>>>  #define KVM_ARM_VCPU_PMU_V3_CTRL    0
>>>>>  #define   KVM_ARM_VCPU_PMU_V3_IRQ    0
>>>>>  #define   KVM_ARM_VCPU_PMU_V3_INIT    1
>>>>> +#define   KVM_ARM_VCPU_PMU_V3_FILTER    2
>>>>>  #define KVM_ARM_VCPU_TIMER_CTRL        1
>>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_VTIMER        0
>>>>>  #define   KVM_ARM_VCPU_TIMER_IRQ_PTIMER        1
>>>>> diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c
>>>>> index eda7b624eab8..8d849ac88a44 100644
>>>>> --- a/virt/kvm/arm/arm.c
>>>>> +++ b/virt/kvm/arm/arm.c
>>>>> @@ -164,6 +164,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
>>>>>      free_percpu(kvm->arch.last_vcpu_ran);
>>>>>      kvm->arch.last_vcpu_ran = NULL;
>>>>>
>>>>> +    bitmap_free(kvm->arch.pmu_filter);
>>>>> +
>>>>>      for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>>>>>          if (kvm->vcpus[i]) {
>>>>>              kvm_vcpu_destroy(kvm->vcpus[i]);
>>>>> diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
>>>>> index f0d0312c0a55..9f0fd0224d5b 100644
>>>>> --- a/virt/kvm/arm/pmu.c
>>>>> +++ b/virt/kvm/arm/pmu.c
>>>>> @@ -579,10 +579,19 @@ static void kvm_pmu_create_perf_event(struct
>>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>>>
>>>>>      kvm_pmu_stop_counter(vcpu, pmc);
>>>>>      eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>>> +    if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>>> +        eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>> nit:
>>>>     if (pmc->idx == ARMV8_PMU_CYCLE_IDX)
>>>>         eventsel = ARMV8_PMUV3_PERFCTR_CPU_CYCLES;
>>>>     else
>>>>         eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
>>>
>>> You don't like it? ;-)
>> ? eventset set only once instead of 2 times
> 
> The compiler does the right thing, but sore, I'll change it.
> 
>>>
>>>>>
>>>>>      /* Software increment event does't need to be backed by a perf
>>>>> event */
>>>> nit: while wer are at it fix the does't typo
>>>>> -    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
>>>>> -        pmc->idx != ARMV8_PMU_CYCLE_IDX)
>>>>> +    if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR)
>>>>> +        return;
>>>>> +
>>>>> +    /*
>>>>> +     * If we have a filter in place and that the event isn't
>>>>> allowed, do
>>>>> +     * not install a perf event either.
>>>>> +     */
>>>>> +    if (vcpu->kvm->arch.pmu_filter &&
>>>>> +        !test_bit(eventsel, vcpu->kvm->arch.pmu_filter))
>>>>>          return;
>>>>>
>>>>>      memset(&attr, 0, sizeof(struct perf_event_attr));
>>>>> @@ -594,8 +603,7 @@ static void kvm_pmu_create_perf_event(struct
>>>>> kvm_vcpu *vcpu, u64 select_idx)
>>>>>      attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
>>>>>      attr.exclude_hv = 1; /* Don't count EL2 events */
>>>>>      attr.exclude_host = 1; /* Don't count host events */
>>>>> -    attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
>>>>> -        ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
>>>>> +    attr.config = eventsel;
>>>> So in that case the guest counter will not increment but the guest does
>>>> not know the counter is not implemented. Can't this lead to bad user
>>>> experience. Shouldn't this disablement be reflected in PMCEID0/1 regs?
>>>
>>> The whole point is that we want to keep things hidden from the guest.
>>> Also, PMCEID{0,1} only describe a small set of events (the architected
>>> common events), and not the whole range of microarchitectural events
>>> that the CPU implements.
>>
>> I am still not totally convinced. Things are not totally hidden to the
>> guest as the counter does not increment, right? So a guest may try to
>> use as it is advertised in PMCEID0/1 but not get the expected results
>> leading to potential support request. I agree not all the events are
>> described there but your API also allows to filter out some of the ones
>> that are advertised.
> 
> I think we're at odds when it comes to the goal of this series. If you
> read the CPU TRM, you will find that event X is implemented. You look
> at PMCEIDx, and you find it is not. You still get a support request! ;-)
Yep that's a weird situation indeed, I haven't thought about the TRM.
> 
> Dropping events from these registers is totally trivial, but I'm not
> sure this will reduce the surprise effect. It doesn't hurt anyway, so
> I'll implement that.
Up to you. Or at least you can document it in the commit msg.

Thanks

Eric

> 
>>>
>>>>>
>>>>>      counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
>>>>>
>>>>> @@ -735,15 +743,6 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
>>>>>
>>>>>  static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu)
>>>>>  {
>>>>> -    if (!kvm_arm_support_pmu_v3())
>>>>> -        return -ENODEV;
>>>>> -
>>>>> -    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>>> -        return -ENXIO;
>>>>> -
>>>>> -    if (vcpu->arch.pmu.created)
>>>>> -        return -EBUSY;
>>>>> -
>>>>>      if (irqchip_in_kernel(vcpu->kvm)) {
>>>>>          int ret;
>>>>>
>>>>> @@ -794,8 +793,19 @@ static bool pmu_irq_is_valid(struct kvm *kvm,
>>>>> int irq)
>>>>>      return true;
>>>>>  }
>>>>>
>>>>> +#define NR_EVENTS    (ARMV8_PMU_EVTYPE_EVENT + 1)
>>>>> +
>>>>>  int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct
>>>>> kvm_device_attr *attr)
>>>>>  {
>>>>> +    if (!kvm_arm_support_pmu_v3())
>>>>> +        return -ENODEV;
>>>>> +
>>>>> +    if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>>> +        return -ENODEV;
>>>> I see you changed -ENXIO into -ENODEV. wanted?
>>>
>>> Probably not... but see below.
>>>
>>>>> +
>>>>> +    if (vcpu->arch.pmu.created)
>>>>> +        return -EBUSY;
>>>>> +
>>>>>      switch (attr->attr) {
>>>>>      case KVM_ARM_VCPU_PMU_V3_IRQ: {
>>>>>          int __user *uaddr = (int __user *)(long)attr->addr;
>>>>> @@ -804,9 +814,6 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu
>>>>> *vcpu, struct kvm_device_attr *attr)
>>>>>          if (!irqchip_in_kernel(vcpu->kvm))
>>>>>              return -EINVAL;
>>>>>
>>>>> -        if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features))
>>>>> -            return -ENODEV;
>>>>> -
>>>
>>> Here's why. I wonder if we already have a problem with the consistency
>>> of the
>>> error codes returned to userspace.
>> OK. Then you may document it in the commit message?
> 
> I still need to work out whether we actually have an issue on that.
> 
> [...]
> 
>>>> not related to this patch but shouldn't we advertise this only with
>>>> in-kernel irqchip?
>>>
>>> We do support the PMU without the in-kernel chip, unfortunately... Yes,
>>> supporting this feature was a big mistake.
>> But I see in kvm_arm_pmu_v3_set_attr:
>> case KVM_ARM_VCPU_PMU_V3_IRQ:
>> ../..
>>                 if (!irqchip_in_kernel(vcpu->kvm))
>>                         return -EINVAL;
> 
> Ah, I see what you mean. Yes, we probably shouldn't report that the PMU
> IRQ attribute is supported when we don't have an in-kernel irqchip.
> 
> Thanks,
> 
>         M.





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux