- Add a VM ioctl that can control which events the guest can monitor. Signed-off-by: ehankland <ehankland@xxxxxxxxxx> --- Changes since v1: -Moved to a vm ioctl rather than a vcpu one -Changed from a whitelist to a configurable filter which can either be white or black -Only restrict GP counters since fixed counters require extra handling and they can be disabled by setting the guest cpuid (though only by setting the number - they can't be disabled individually) --- Documentation/virtual/kvm/api.txt | 25 +++++++++++++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/include/uapi/asm/kvm.h | 10 +++++ arch/x86/kvm/pmu.c | 61 +++++++++++++++++++++++++++++++ arch/x86/kvm/pmu.h | 1 + arch/x86/kvm/x86.c | 6 +++ include/uapi/linux/kvm.h | 3 ++ 7 files changed, 108 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 91fd86fcc49f..a9ee8da36595 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -4065,6 +4065,31 @@ KVM_ARM_VCPU_FINALIZE call. See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization using this ioctl. +4.120 KVM_SET_PMU_EVENT_FILTER + +Capability: KVM_CAP_PMU_EVENT_FILTER +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_pmu_event_filter (in) +Returns: 0 on success, -1 on error + +struct kvm_pmu_event_filter { + __u32 type; + __u32 nevents; + __u64 events[0]; +}; + +This ioctl restricts the set of PMU events that the guest can program to either +a whitelist or a blacklist of events. The eventsel+umask of each event the +guest attempts to program is compared against the events field to determine +whether the guest should have access. This only affects general purpose +counters; fixed purpose counters can be disabled by changing the perfmon +CPUID leaf. + +Valid values for 'type': +#define KVM_PMU_EVENT_WHITELIST 0 +#define KVM_PMU_EVENT_BLACKLIST 1 + 5. The kvm_run structure ------------------------ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f46a12a5cf2e..34d017bd1d1b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -933,6 +933,8 @@ struct kvm_arch { bool guest_can_read_msr_platform_info; bool exception_payload_enabled; + + struct kvm_pmu_event_filter *pmu_event_filter; }; struct kvm_vm_stat { diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index f9b021e16ebc..4d2e905b7d79 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -422,4 +422,14 @@ struct kvm_nested_state { __u8 data[0]; }; +/* for KVM_CAP_PMU_EVENT_FILTER */ +struct kvm_pmu_event_filter { + __u32 type; + __u32 nevents; + __u64 events[0]; +}; + +#define KVM_PMU_EVENT_WHITELIST 0 +#define KVM_PMU_EVENT_BLACKLIST 1 + #endif /* _ASM_X86_KVM_H */ diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index dd745b58ffd8..d674b79ff8da 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -22,6 +22,9 @@ #include "lapic.h" #include "pmu.h" +/* This keeps the total size of the filter under 4k. */ +#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 + /* NOTE: * - Each perf counter is defined as "struct kvm_pmc"; * - There are two types of perf counters: general purpose (gp) and fixed. @@ -144,6 +147,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) { unsigned config, type = PERF_TYPE_RAW; u8 event_select, unit_mask; + struct kvm_arch *arch = &pmc->vcpu->kvm->arch; + struct kvm_pmu_event_filter *filter; + int i; + bool allow_event = true; if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL) printk_once("kvm pmu: pin control bit is ignored\n"); @@ -155,6 +162,24 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel) if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc)) return; + rcu_read_lock(); + filter = rcu_dereference(arch->pmu_event_filter); + if (filter) { + for (i = 0; i < filter->nevents; i++) + if (filter->events[i] == + (eventsel & AMD64_RAW_EVENT_MASK_NB)) + break; + if (filter->type == KVM_PMU_EVENT_WHITELIST && + i == filter->nevents) + allow_event = false; + if (filter->type == KVM_PMU_EVENT_BLACKLIST && + i < filter->nevents) + allow_event = false; + } + rcu_read_unlock(); + if (!allow_event) + return; + event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT; unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8; @@ -351,3 +376,39 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu) { kvm_pmu_reset(vcpu); } + +int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp) +{ + struct kvm_pmu_event_filter tmp, *filter; + size_t size; + int r; + + if (copy_from_user(&tmp, argp, sizeof(tmp))) + return -EFAULT; + + if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) + return -E2BIG; + + size = sizeof(tmp) + sizeof(tmp.events[0]) * tmp.nevents; + filter = vmalloc(size); + if (!filter) + return -ENOMEM; + + r = -EFAULT; + if (copy_from_user(filter, argp, size)) + goto cleanup; + + /* Ensure nevents can't be changed between the user copies. */ + *filter = tmp; + + mutex_lock(&kvm->lock); + rcu_swap_protected(kvm->arch.pmu_event_filter, filter, + mutex_is_locked(&kvm->lock)); + mutex_unlock(&kvm->lock); + + synchronize_rcu(); + r = 0; +cleanup: + kvfree(filter); + return r; +} diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 22dff661145a..58265f761c3b 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu); void kvm_pmu_reset(struct kvm_vcpu *vcpu); void kvm_pmu_init(struct kvm_vcpu *vcpu); void kvm_pmu_destroy(struct kvm_vcpu *vcpu); +int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp); bool is_vmware_backdoor_pmc(u32 pmc_idx); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2e302e977dac..9ddfc7193bc6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3135,6 +3135,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_GET_MSR_FEATURES: case KVM_CAP_MSR_PLATFORM_INFO: case KVM_CAP_EXCEPTION_PAYLOAD: + case KVM_CAP_PMU_EVENT_FILTER: r = 1; break; case KVM_CAP_SYNC_REGS: @@ -4978,6 +4979,11 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd); break; } + case KVM_SET_PMU_EVENT_FILTER: { + r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp); + break; + + } default: r = -ENOTTY; } diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index c2152f3dd02d..b18ff80e356a 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -995,6 +995,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SVE 170 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 +#define KVM_CAP_PMU_EVENT_FILTER 173 #ifdef KVM_CAP_IRQ_ROUTING @@ -1329,6 +1330,8 @@ struct kvm_s390_ucas_mapping { #define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info) /* Available with KVM_CAP_PPC_GET_CPU_CHAR */ #define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char) +/* Availabile with KVM_CAP_PMU_EVENT_FILTER */ +#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter) /* ioctl for vm fd */ #define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)