> On 10 Mar 2025, at 12:42 PM, Vaibhav Jain <vaibhav@xxxxxxxxxxxxx> wrote: > > Athira Rajeev <atrajeev@xxxxxxxxxxxxx> writes: > >>> On 24 Feb 2025, at 6:45 PM, Vaibhav Jain <vaibhav@xxxxxxxxxxxxx> wrote: >>> >>> Introduce a new PMU named 'kvm-hv' inside a new module named 'kvm-hv-pmu' >>> to report Book3s kvm-hv specific performance counters. This will expose >>> KVM-HV specific performance attributes to user-space via kernel's PMU >>> infrastructure and would enableusers to monitor active kvm-hv based guests. >>> >>> The patch creates necessary scaffolding to for the new PMU callbacks and >>> introduces the new kernel module name 'kvm-hv-pmu' which is built with >>> CONFIG_KVM_BOOK3S_HV_PMU. The patch doesn't introduce any perf-events yet, >>> which will be introduced in later patches >>> >>> Signed-off-by: Vaibhav Jain <vaibhav@xxxxxxxxxxxxx> >>> >>> --- >>> Changelog >>> >>> v3->v4: >>> * Introduced a new kernel module named 'kmv-hv-pmu' to host the new PMU >>> instead of building the as part of KVM-HV module. [ Maddy ] >>> * Moved the code from arch/powerpc/kvm to arch/powerpc/perf [ Atheera ] >>> * Added a new config named KVM_BOOK3S_HV_PMU to arch/powerpc/kvm/Kconfig >>> >>> v2->v3: >>> * Fixed a build warning reported by kernel build robot. >>> Link: >>> https://lore.kernel.org/oe-kbuild-all/202501171030.3x0gqW8G-lkp@xxxxxxxxx >>> >>> v1->v2: >>> * Fixed an issue of kvm-hv not loading on baremetal kvm [Gautam] >>> --- >>> arch/powerpc/kvm/Kconfig | 13 ++++ >>> arch/powerpc/perf/Makefile | 2 + >>> arch/powerpc/perf/kvm-hv-pmu.c | 138 +++++++++++++++++++++++++++++++++ >>> 3 files changed, 153 insertions(+) >>> create mode 100644 arch/powerpc/perf/kvm-hv-pmu.c >>> >>> diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig >>> index dbfdc126bf14..5f0ce19e7e27 100644 >>> --- a/arch/powerpc/kvm/Kconfig >>> +++ b/arch/powerpc/kvm/Kconfig >>> @@ -83,6 +83,7 @@ config KVM_BOOK3S_64_HV >>> depends on KVM_BOOK3S_64 && PPC_POWERNV >>> select KVM_BOOK3S_HV_POSSIBLE >>> select KVM_GENERIC_MMU_NOTIFIER >>> + select KVM_BOOK3S_HV_PMU >>> select CMA >>> help >>> Support running unmodified book3s_64 guest kernels in >>> @@ -171,6 +172,18 @@ config KVM_BOOK3S_HV_NESTED_PMU_WORKAROUND >>> those buggy L1s which saves the L2 state, at the cost of performance >>> in all nested-capable guest entry/exit. >>> >>> +config KVM_BOOK3S_HV_PMU >>> + tristate "Hypervisor Perf events for KVM Book3s-HV" >>> + depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS >>> + help >>> + Enable Book3s-HV Hypervisor Perf events PMU named 'kvm-hv'. These >>> + Perf events give an overview of hypervisor performance overall >>> + instead of a specific guests. Currently the PMU reports >>> + L0-Hypervisor stats on a kvm-hv enabled PSeries LPAR like: >>> + * Total/Used Guest-Heap >>> + * Total/Used Guest Page-table Memory >>> + * Total amount of Guest Page-table Memory reclaimed >>> + >>> config KVM_BOOKE_HV >>> bool >>> >>> diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile >>> index ac2cf58d62db..7f53fcb7495a 100644 >>> --- a/arch/powerpc/perf/Makefile >>> +++ b/arch/powerpc/perf/Makefile >>> @@ -18,6 +18,8 @@ obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o >>> >>> obj-$(CONFIG_VPA_PMU) += vpa-pmu.o >>> >>> +obj-$(CONFIG_KVM_BOOK3S_HV_PMU) += kvm-hv-pmu.o >>> + >>> obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o >>> >>> obj-$(CONFIG_PPC64) += $(obj64-y) >>> diff --git a/arch/powerpc/perf/kvm-hv-pmu.c b/arch/powerpc/perf/kvm-hv-pmu.c >>> new file mode 100644 >>> index 000000000000..c154f54e09e2 >>> --- /dev/null >>> +++ b/arch/powerpc/perf/kvm-hv-pmu.c >>> @@ -0,0 +1,138 @@ >>> +// SPDX-License-Identifier: GPL-2.0 >>> +/* >>> + * Description: PMUs specific to running nested KVM-HV guests >>> + * on Book3S processors (specifically POWER9 and later). >>> + */ >>> + >>> +#define pr_fmt(fmt) "kvmppc-pmu: " fmt >>> + >>> +#include "asm-generic/local64.h" >>> +#include <linux/kernel.h> >>> +#include <linux/errno.h> >>> +#include <linux/ratelimit.h> >>> +#include <linux/kvm_host.h> >>> +#include <linux/gfp_types.h> >>> +#include <linux/pgtable.h> >>> +#include <linux/perf_event.h> >>> +#include <linux/spinlock_types.h> >>> +#include <linux/spinlock.h> >>> + >>> +#include <asm/types.h> >>> +#include <asm/kvm_ppc.h> >>> +#include <asm/kvm_book3s.h> >>> +#include <asm/mmu.h> >>> +#include <asm/pgalloc.h> >>> +#include <asm/pte-walk.h> >>> +#include <asm/reg.h> >>> +#include <asm/plpar_wrappers.h> >>> +#include <asm/firmware.h> >>> + >>> +enum kvmppc_pmu_eventid { >>> + KVMPPC_EVENT_MAX, >>> +}; >>> + >>> +static struct attribute *kvmppc_pmu_events_attr[] = { >>> + NULL, >>> +}; >>> + >>> +static const struct attribute_group kvmppc_pmu_events_group = { >>> + .name = "events", >>> + .attrs = kvmppc_pmu_events_attr, >>> +}; >>> + >>> +PMU_FORMAT_ATTR(event, "config:0"); >>> +static struct attribute *kvmppc_pmu_format_attr[] = { >>> + &format_attr_event.attr, >>> + NULL, >>> +}; >>> + >>> +static struct attribute_group kvmppc_pmu_format_group = { >>> + .name = "format", >>> + .attrs = kvmppc_pmu_format_attr, >>> +}; >>> + >>> +static const struct attribute_group *kvmppc_pmu_attr_groups[] = { >>> + &kvmppc_pmu_events_group, >>> + &kvmppc_pmu_format_group, >>> + NULL, >>> +}; >>> + >>> +static int kvmppc_pmu_event_init(struct perf_event *event) >>> +{ >>> + unsigned int config = event->attr.config; >>> + >>> + pr_debug("%s: Event(%p) id=%llu cpu=%x on_cpu=%x config=%u", >>> + __func__, event, event->id, event->cpu, >>> + event->oncpu, config); >>> + >>> + if (event->attr.type != event->pmu->type) >>> + return -ENOENT; >>> + >>> + if (config >= KVMPPC_EVENT_MAX) >>> + return -EINVAL; >>> + >>> + local64_set(&event->hw.prev_count, 0); >>> + local64_set(&event->count, 0); >>> + >>> + return 0; >>> +} >>> + >>> +static void kvmppc_pmu_del(struct perf_event *event, int flags) >>> +{ >>> +} >>> + >>> +static int kvmppc_pmu_add(struct perf_event *event, int flags) >>> +{ >>> + return 0; >>> +} >>> + >>> +static void kvmppc_pmu_read(struct perf_event *event) >>> +{ >>> +} >>> + >>> +/* L1 wide counters PMU */ >>> +static struct pmu kvmppc_pmu = { >>> + .module = THIS_MODULE, >>> + .task_ctx_nr = perf_sw_context, >>> + .name = "kvm-hv", >>> + .event_init = kvmppc_pmu_event_init, >>> + .add = kvmppc_pmu_add, >>> + .del = kvmppc_pmu_del, >>> + .read = kvmppc_pmu_read, >>> + .attr_groups = kvmppc_pmu_attr_groups, >>> + .type = -1, >>> +}; >>> + >>> +static int __init kvmppc_register_pmu(void) >>> +{ >>> + int rc = -EOPNOTSUPP; >>> + >>> + /* only support events for nestedv2 right now */ >>> + if (kvmhv_is_nestedv2()) { >> >> We don’t need PVR check here ? Description of module says this is >> supported for power9 and later. > The hcalls this module depends on, are only available to LPAR/KVM-Guest running with api-v2 support hence this is needed. Ok, I understand we need kvmhv_is_nestedv2() Doubt is whether we need PVR check here. > >>> + /* Setup done now register the PMU */ >>> + pr_info("Registering kvm-hv pmu"); >>> + >>> + /* Register only if we arent already registered */ >> Not sure why we need this… Have you seen any issue without this ? I don’t see any similar check in arch/powerpc/perf/vpa-pmu.c , >> > This check is taken from the previous version of this patch which > prevented struct pmu initialization multiple times. However with > now a seperate module this check is probably not needed. Sure, Please check and remove if this is not needed. Thanks Athira > >>> + rc = (kvmppc_pmu.type == -1) ? >>> + perf_pmu_register(&kvmppc_pmu, kvmppc_pmu.name, >>> + -1) : 0; >>> + } >>> + >>> + return rc; >>> +} >>> + >>> +static void __exit kvmppc_unregister_pmu(void) >>> +{ >>> + if (kvmhv_is_nestedv2()) { >>> + if (kvmppc_pmu.type != -1) >>> + perf_pmu_unregister(&kvmppc_pmu); >>> + >>> + pr_info("kvmhv_pmu unregistered.\n"); >>> + } >>> +} >>> + >>> +module_init(kvmppc_register_pmu); >>> +module_exit(kvmppc_unregister_pmu); >>> +MODULE_DESCRIPTION("KVM PPC Book3s-hv PMU"); >>> +MODULE_AUTHOR("Vaibhav Jain <vaibhav@xxxxxxxxxxxxx>"); >>> +MODULE_LICENSE("GPL"); >>> -- >>> 2.48.1 >>> >>> >>> >> > > -- > Cheers > ~ Vaibhav