> On 24 Feb 2025, at 6:45 PM, Vaibhav Jain <vaibhav@xxxxxxxxxxxxx> wrote: > > Introduce a new PMU named 'kvm-hv' inside a new module named 'kvm-hv-pmu' > to report Book3s kvm-hv specific performance counters. This will expose > KVM-HV specific performance attributes to user-space via kernel's PMU > infrastructure and would enableusers to monitor active kvm-hv based guests. > > The patch creates necessary scaffolding to for the new PMU callbacks and > introduces the new kernel module name 'kvm-hv-pmu' which is built with > CONFIG_KVM_BOOK3S_HV_PMU. The patch doesn't introduce any perf-events yet, > which will be introduced in later patches > > Signed-off-by: Vaibhav Jain <vaibhav@xxxxxxxxxxxxx> > > --- > Changelog > > v3->v4: > * Introduced a new kernel module named 'kmv-hv-pmu' to host the new PMU > instead of building the as part of KVM-HV module. [ Maddy ] > * Moved the code from arch/powerpc/kvm to arch/powerpc/perf [ Atheera ] > * Added a new config named KVM_BOOK3S_HV_PMU to arch/powerpc/kvm/Kconfig > > v2->v3: > * Fixed a build warning reported by kernel build robot. > Link: > https://lore.kernel.org/oe-kbuild-all/202501171030.3x0gqW8G-lkp@xxxxxxxxx > > v1->v2: > * Fixed an issue of kvm-hv not loading on baremetal kvm [Gautam] > --- > arch/powerpc/kvm/Kconfig | 13 ++++ > arch/powerpc/perf/Makefile | 2 + > arch/powerpc/perf/kvm-hv-pmu.c | 138 +++++++++++++++++++++++++++++++++ > 3 files changed, 153 insertions(+) > create mode 100644 arch/powerpc/perf/kvm-hv-pmu.c > > diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig > index dbfdc126bf14..5f0ce19e7e27 100644 > --- a/arch/powerpc/kvm/Kconfig > +++ b/arch/powerpc/kvm/Kconfig > @@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV > depends on KVM_BOOK3S_64 && PPC_POWERNV > select KVM_BOOK3S_HV_POSSIBLE > select KVM_GENERIC_MMU_NOTIFIER > + select KVM_BOOK3S_HV_PMU > select CMA > help > Support running unmodified book3s_64 guest kernels in > @@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND > those buggy L1s which saves the L2 state, at the cost of performance > in all nested-capable guest entry/exit. > > +config KVM_BOOK3S_HV_PMU > + tristate "Hypervisor Perf events for KVM Book3s-HV" > + depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS > + help > + Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These > + Perf events give an overview of hypervisor performance overall > + instead of a specific guests. Currently the PMU reports > + L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like: > + * Total/Used Guest-Heap > + * Total/Used Guest Page-table Memory > + * Total amount of Guest Page-table Memory reclaimed > + > config KVM_BOOKE_HV > bool > > diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile > index ac2cf58d62db..7f53fcb7495a 100644 > --- a/arch/powerpc/perf/Makefile > +++ b/arch/powerpc/perf/Makefile > @@ -18,6 +18,8 @@ obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o > > obj-$(CONFIG_VPA_PMU) += vpa-pmu.o > > +obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o > + > obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o > > obj-$(CONFIG_PPC64) += $(obj64-y) > diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c > new file mode 100644 > index 000000000000..c154f54e09e2 > --- /dev/null > +++ b/arch/powerpc/perf/kvm-hv-pmu.c > @@ -0,0 +1,138 @@ > +// SPDX-License-Identifier: GPL-2.0 > +/* > + * Description: PMUs specific to running nested KVM-HV guests > + * on Book3S processors (specifically POWER9 and later). > + */ > + > +#define pr_fmt(fmt) "kvmppc-pmu: " fmt > + > +#include "asm-generic/local64.h" > +#include <linux/kernel.h> > +#include <linux/errno.h> > +#include <linux/ratelimit.h> > +#include <linux/kvm_host.h> > +#include <linux/gfp_types.h> > +#include <linux/pgtable.h> > +#include <linux/perf_event.h> > +#include <linux/spinlock_types.h> > +#include <linux/spinlock.h> > + > +#include <asm/types.h> > +#include <asm/kvm_ppc.h> > +#include <asm/kvm_book3s.h> > +#include <asm/mmu.h> > +#include <asm/pgalloc.h> > +#include <asm/pte-walk.h> > +#include <asm/reg.h> > +#include <asm/plpar_wrappers.h> > +#include <asm/firmware.h> > + > +enum kvmppc_pmu_eventid { > + KVMPPC_EVENT_MAX, > +}; > + > +static struct attribute *kvmppc_pmu_events_attr[] = { > + NULL, > +}; > + > +static const struct attribute_group kvmppc_pmu_events_group = { > + .name = "events", > + .attrs = kvmppc_pmu_events_attr, > +}; > + > +PMU_FORMAT_ATTR(event, "config:0"); > +static struct attribute *kvmppc_pmu_format_attr[] = { > + &format_attr_event.attr, > + NULL, > +}; > + > +static struct attribute_group kvmppc_pmu_format_group = { > + .name = "format", > + .attrs = kvmppc_pmu_format_attr, > +}; > + > +static const struct attribute_group *kvmppc_pmu_attr_groups[] = { > + &kvmppc_pmu_events_group, > + &kvmppc_pmu_format_group, > + NULL, > +}; > + > +static int kvmppc_pmu_event_init(struct perf_event *event) > +{ > + unsigned int config = event->attr.config; > + > + pr_debug("%s: Event(%p) id=%llu cpu=%x on_cpu=%x config=%u", > + __func__, event, event->id, event->cpu, > + event->oncpu, config); > + > + if (event->attr.type != event->pmu->type) > + return -ENOENT; > + > + if (config >= KVMPPC_EVENT_MAX) > + return -EINVAL; > + > + local64_set(&event->hw.prev_count, 0); > + local64_set(&event->count, 0); > + > + return 0; > +} > + > +static void kvmppc_pmu_del(struct perf_event *event, int flags) > +{ > +} > + > +static int kvmppc_pmu_add(struct perf_event *event, int flags) > +{ > + return 0; > +} > + > +static void kvmppc_pmu_read(struct perf_event *event) > +{ > +} > + > +/* L1 wide counters PMU */ > +static struct pmu kvmppc_pmu = { > + .module = THIS_MODULE, > + .task_ctx_nr = perf_sw_context, > + .name = "kvm-hv", > + .event_init = kvmppc_pmu_event_init, > + .add = kvmppc_pmu_add, > + .del = kvmppc_pmu_del, > + .read = kvmppc_pmu_read, > + .attr_groups = kvmppc_pmu_attr_groups, > + .type = -1, > +}; > + > +static int __init kvmppc_register_pmu(void) > +{ > + int rc = -EOPNOTSUPP; > + > + /* only support events for nestedv2 right now */ > + if (kvmhv_is_nestedv2()) { We don’t need PVR check here ? Description of module says this is supported for power9 and later. > + /* Setup done now register the PMU */ > + pr_info("Registering kvm-hv pmu"); > + > + /* Register only if we arent already registered */ Not sure why we need this… Have you seen any issue without this ? I don’t see any similar check in arch/powerpc/perf/vpa-pmu.c , > + rc = (kvmppc_pmu.type == -1) ? > + perf_pmu_register(&kvmppc_pmu, kvmppc_pmu.name, > + -1) : 0; > + } > + > + return rc; > +} > + > +static void __exit kvmppc_unregister_pmu(void) > +{ > + if (kvmhv_is_nestedv2()) { > + if (kvmppc_pmu.type != -1) > + perf_pmu_unregister(&kvmppc_pmu); > + > + pr_info("kvmhv_pmu unregistered.\n"); > + } > +} > + > +module_init(kvmppc_register_pmu); > +module_exit(kvmppc_unregister_pmu); > +MODULE_DESCRIPTION("KVM PPC Book3s-hv PMU"); > +MODULE_AUTHOR("Vaibhav Jain <vaibhav@xxxxxxxxxxxxx>"); > +MODULE_LICENSE("GPL"); > -- > 2.48.1 > > >