Updated the recipients list. Thanks Harsh for reviewing the patch. I have addressed your review comments in the v2 of the patch located at [1]. [1] https://lore.kernel.org/all/20250123115538.86821-1-vaibhav@xxxxxxxxxxxxx Harsh Prateek Bora <harshpb@xxxxxxxxxxxxx> writes: > Hi Vaibhav, > > On 1/15/25 12:37, Vaibhav Jain wrote: >> Add support for reporting Hostwide state counters for nested KVM pseries >> guests running with 'cap-nested-hv' with Qemu-TCG acting as > > I hope you meant cap-nested-papr for APIv2 (cap-nested-hv is APIv1). > >> L0-hypervisor. sPAPR supports reporting various stats counters for >> Guest-Management-Area(GMA) thats owned by L0-Hypervisor and are documented >> at [1]. These stats counters are exposed via a new bit-flag named >> 'getHostWideState' for the H_GUEST_GET_STATE hcall. Once this flag is set >> the hcall should populate the Guest-State-Elements in the requested GSB >> with the stat counter values. Currently following five counters are >> supported: >> >> * host_heap : The currently used bytes in the >> Hypervisor's Guest Management Space >> associated with the Host Partition. >> * host_heap_max : The maximum bytes available in the >> Hypervisor's Guest Management Space >> associated with the Host Partition. >> * host_pagetable : The currently used bytes in the >> Hypervisor's Guest Page Table Management >> Space associated with the Host Partition. >> * host_pagetable_max : The maximum bytes available in the >> Hypervisor's Guest Page Table Management >> Space associated with the Host Partition. >> * host_pagetable_reclaim: The amount of space in bytes that has >> been reclaimed due to overcommit in the >> Hypervisor's Guest Page Table Management >> Space associated with the Host Partition. >> >> At the moment '0' is being reported for all these counters as these >> counters doesnt align with how L0-Qemu manages Guest memory. >> >> The patch implements support for these counters by adding new members to >> the 'struct SpaprMachineStateNested'. These new members are then plugged >> into the existing 'guest_state_element_types[]' with the help of a new >> macro 'GSBE_MACHINE_NESTED_DW' together with a new helper >> 'get_machine_ptr()'. guest_state_request_check() is updated to ensure >> correctness of the requested GSB and finally h_guest_getset_state() is >> updated to handle the newly introduced flag >> 'GUEST_STATE_REQUEST_HOST_WIDE'. >> >> This patch is tested with the proposed linux-kernel implementation to >> expose these stat-counter as perf-events at [2]. >> >> [2] >> https://lore.kernel.org/all/20241222140247.174998-1-vaibhav@xxxxxxxxxxxxx >> >> [1] >> https://lore.kernel.org/all/20241222140247.174998-2-vaibhav@xxxxxxxxxxxxx >> >> Signed-off-by: Vaibhav Jain <vaibhav@xxxxxxxxxxxxx> >> --- >> hw/ppc/spapr_nested.c | 82 ++++++++++++++++++++++++++--------- >> include/hw/ppc/spapr_nested.h | 36 ++++++++++++--- >> 2 files changed, 93 insertions(+), 25 deletions(-) >> >> diff --git a/hw/ppc/spapr_nested.c b/hw/ppc/spapr_nested.c >> index 7def8eb73b..d912b99e92 100644 >> --- a/hw/ppc/spapr_nested.c >> +++ b/hw/ppc/spapr_nested.c >> @@ -64,10 +64,9 @@ static >> SpaprMachineStateNestedGuest *spapr_get_nested_guest(SpaprMachineState *spapr, >> target_ulong guestid) >> { >> - SpaprMachineStateNestedGuest *guest; >> - >> - guest = g_hash_table_lookup(spapr->nested.guests, GINT_TO_POINTER(guestid)); >> - return guest; >> + return spapr->nested.guests ? >> + g_hash_table_lookup(spapr->nested.guests, >> + GINT_TO_POINTER(guestid)) : NULL; >> } >> >> bool spapr_get_pate_nested_papr(SpaprMachineState *spapr, PowerPCCPU *cpu, >> @@ -613,6 +612,13 @@ static void *get_guest_ptr(SpaprMachineStateNestedGuest *guest, >> return guest; /* for GSBE_NESTED */ >> } >> >> +static void *get_machine_ptr(SpaprMachineStateNestedGuest *guest, >> + target_ulong vcpuid) >> +{ >> + SpaprMachineState *spapr = SPAPR_MACHINE(qdev_get_machine()); >> + return &spapr->nested; >> +} >> + >> /* >> * set=1 means the L1 is trying to set some state >> * set=0 means the L1 is trying to get some state >> @@ -1012,7 +1018,12 @@ struct guest_state_element_type guest_state_element_types[] = { >> GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUFFER, 0x10, runbufout, copy_state_runbuf), >> GSBE_NESTED_VCPU(GSB_VCPU_OUT_BUF_MIN_SZ, 0x8, runbufout, out_buf_min_size), >> GSBE_NESTED_VCPU(GSB_VCPU_HDEC_EXPIRY_TB, 0x8, hdecr_expiry_tb, >> - copy_state_hdecr) >> + copy_state_hdecr), >> + GSBE_MACHINE_NESTED_DW(GSB_GUEST_HEAP, current_guest_heap), >> + GSBE_MACHINE_NESTED_DW(GSB_GUEST_HEAP_MAX, max_guest_heap), >> + GSBE_MACHINE_NESTED_DW(GSB_GUEST_PGTABLE_SIZE, current_pgtable_size), >> + GSBE_MACHINE_NESTED_DW(GSB_GUEST_PGTABLE_SIZE_MAX, max_pgtable_size), >> + GSBE_MACHINE_NESTED_DW(GSB_GUEST_PGTABLE_RECLAIM, pgtable_reclaim_size), >> }; >> >> void spapr_nested_gsb_init(void) >> @@ -1030,6 +1041,10 @@ void spapr_nested_gsb_init(void) >> else if (type->id >= GSB_VCPU_IN_BUFFER) >> /* 0x0c00 - 0xf000 Thread + RW */ >> type->flags = 0; >> + else if (type->id >= GSB_GUEST_HEAP) >> + /*0x0800 - 0x0804 Hostwide Counters*/ >> + type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE | >> + GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY; >> else if (type->id >= GSB_VCPU_LPVR) >> /* 0x0003 - 0x0bff Guest + RW */ > > Now above comment can be updated for range until 0x07ff. > >> type->flags = GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE; >> @@ -1138,22 +1153,30 @@ static bool guest_state_request_check(struct guest_state_request *gsr) >> return false; >> } >> >> - if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) { >> + if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE) { >> + /* Hostwide elements cant be clubbed with other types */ >> + if (!(gsr->flags & GUEST_STATE_REQUEST_HOST_WIDE)) { >> + qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a host wide " >> + "Element ID:%04x.\n", id); >> + return false; >> + } >> + } else if (type->flags & GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE) { >> /* guest wide element type */ >> if (!(gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE)) { >> - qemu_log_mask(LOG_GUEST_ERROR, "trying to set a guest wide " >> + qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a guest wide " >> "Element ID:%04x.\n", id); >> return false; >> } >> } else { >> /* thread wide element type */ >> - if (gsr->flags & GUEST_STATE_REQUEST_GUEST_WIDE) { >> - qemu_log_mask(LOG_GUEST_ERROR, "trying to set a thread wide " >> - "Element ID:%04x.\n", id); >> + if (gsr->flags & (GUEST_STATE_REQUEST_GUEST_WIDE | >> + GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE)) { >> + qemu_log_mask(LOG_GUEST_ERROR, "trying to get/set a thread wide" >> + " Element ID:%04x.\n", id); >> return false; >> } >> } >> -next_element: >> + next_element: >> element = guest_state_element_next(element, &len, &num_elements); >> >> } >> @@ -1509,26 +1532,45 @@ static target_ulong h_guest_getset_state(PowerPCCPU *cpu, >> target_ulong buf = args[3]; >> target_ulong buflen = args[4]; >> struct guest_state_request gsr; >> - SpaprMachineStateNestedGuest *guest; >> + SpaprMachineStateNestedGuest *guest = NULL; >> >> - guest = spapr_get_nested_guest(spapr, lpid); >> - if (!guest) { >> - return H_P2; >> - } >> gsr.buf = buf; >> assert(buflen <= GSB_MAX_BUF_SIZE); >> gsr.len = buflen; >> gsr.flags = 0; >> - if (flags & H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) { >> + >> + /* Works for both get/set state */ >> + if (cpu_to_be64(flags) & GUEST_STATE_REQUEST_GUEST_WIDE) { >> gsr.flags |= GUEST_STATE_REQUEST_GUEST_WIDE; >> } >> - if (flags & ~H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE) { >> - return H_PARAMETER; /* flag not supported yet */ >> - } >> >> if (set) { >> + if (flags & ~H_GUEST_SET_STATE_FLAGS_MASK) { >> + return H_PARAMETER; >> + } >> gsr.flags |= GUEST_STATE_REQUEST_SET; >> + } else { >> + /* >> + * No reserved fields to be set in flags nor both >> + * GUEST/HOST wide bits >> + */ >> + if ((flags == H_GUEST_GET_STATE_FLAGS_MASK) || >> + (flags & ~H_GUEST_GET_STATE_FLAGS_MASK)) { >> + return H_PARAMETER; >> + } >> + >> + if (cpu_to_be64(flags) & GUEST_STATE_REQUEST_HOST_WIDE) { > > This conversion can be avoided with the macro in BE format. > >> + gsr.flags |= GUEST_STATE_REQUEST_HOST_WIDE; >> + } >> } >> + >> + if (!(gsr.flags & GUEST_STATE_REQUEST_HOST_WIDE)) { >> + guest = spapr_get_nested_guest(spapr, lpid); >> + if (!guest) { >> + return H_P2; >> + } >> + } >> + >> return map_and_getset_state(cpu, guest, vcpuid, &gsr); >> } >> >> diff --git a/include/hw/ppc/spapr_nested.h b/include/hw/ppc/spapr_nested.h >> index e420220484..c0ffd0a696 100644 >> --- a/include/hw/ppc/spapr_nested.h >> +++ b/include/hw/ppc/spapr_nested.h >> @@ -11,11 +11,18 @@ >> #define GSB_TB_OFFSET 0x0004 /* Timebase Offset */ >> #define GSB_PART_SCOPED_PAGETBL 0x0005 /* Partition Scoped Page Table */ >> #define GSB_PROCESS_TBL 0x0006 /* Process Table */ >> - /* RESERVED 0x0007 - 0x0BFF */ >> + /* RESERVED 0x0007 - 0x07FF */ >> +#define GSB_PROCESS_TBL 0x0006 /* Process Table */ > > This duplication of macro seems to be copy paste error. > >> #define GSB_VCPU_IN_BUFFER 0x0C00 /* Run VCPU Input Buffer */ >> #define GSB_VCPU_OUT_BUFFER 0x0C01 /* Run VCPU Out Buffer */ >> #define GSB_VCPU_VPA 0x0C02 /* HRA to Guest VCPU VPA */ >> /* RESERVED 0x0C03 - 0x0FFF */ >> +#define GSB_GUEST_HEAP 0x0800 /* Guest Management Heap Size */ >> +#define GSB_GUEST_HEAP_MAX 0x0801 /* Guest Management Heap Max Size */ >> +#define GSB_GUEST_PGTABLE_SIZE 0x0802 /* Guest Pagetable Size */ >> +#define GSB_GUEST_PGTABLE_SIZE_MAX 0x0803 /* Guest Pagetable Max Size */ >> +#define GSB_GUEST_PGTABLE_RECLAIM 0x0804 /* Pagetable Reclaim in bytes */ >> + /* RESERVED 0x0805 - 0x0FFF */ > > Above additions should be moved before GSB_VCPU_IN_BUFFER for sequence. > Also, RESERVED range from 0x0805 ends at 0x0BFF. > >> #define GSB_VCPU_GPR0 0x1000 >> #define GSB_VCPU_GPR1 0x1001 >> #define GSB_VCPU_GPR2 0x1002 >> @@ -196,6 +203,13 @@ typedef struct SpaprMachineStateNested { >> #define NESTED_API_PAPR 2 >> bool capabilities_set; >> uint32_t pvr_base; >> + /* Hostwide counters */ >> + uint64_t current_guest_heap; >> + uint64_t max_guest_heap; >> + uint64_t current_pgtable_size; >> + uint64_t max_pgtable_size; >> + uint64_t pgtable_reclaim_size; >> + >> GHashTable *guests; >> } SpaprMachineStateNested; >> >> @@ -229,9 +243,11 @@ typedef struct SpaprMachineStateNestedGuest { >> #define HVMASK_HDEXCR 0x00000000FFFFFFFF >> #define HVMASK_TB_OFFSET 0x000000FFFFFFFFFF >> #define GSB_MAX_BUF_SIZE (1024 * 1024) >> -#define H_GUEST_GETSET_STATE_FLAG_GUEST_WIDE 0x8000000000000000 >> -#define GUEST_STATE_REQUEST_GUEST_WIDE 0x1 >> -#define GUEST_STATE_REQUEST_SET 0x2 >> +#define H_GUEST_GET_STATE_FLAGS_MASK 0xC000000000000000ULL >> +#define H_GUEST_SET_STATE_FLAGS_MASK 0x8000000000000000ULL >> +#define GUEST_STATE_REQUEST_GUEST_WIDE 0x0000000000000080ULL > > Let us keep the PPC bit masks in BE format as per convention and use > endian-ness conversion helpers in code if needed. > > regards, > Harsh > >> +#define GUEST_STATE_REQUEST_HOST_WIDE 0x0000000000000040ULL >> +#define GUEST_STATE_REQUEST_SET 0x0000000000000008ULL >> >> /* >> * As per ISA v3.1B, following bits are reserved: >> @@ -251,6 +267,15 @@ typedef struct SpaprMachineStateNestedGuest { >> .copy = (c) \ >> } >> >> +#define GSBE_MACHINE_NESTED_DW(i, f) { \ >> + .id = (i), \ >> + .size = 8, \ >> + .location = get_machine_ptr, \ >> + .offset = offsetof(struct SpaprMachineStateNested, f), \ >> + .copy = copy_state_8to8, \ >> + .mask = HVMASK_DEFAULT \ >> +} >> + >> #define GSBE_NESTED(i, sz, f, c) { \ >> .id = (i), \ >> .size = (sz), \ >> @@ -509,7 +534,8 @@ struct guest_state_element_type { >> uint16_t id; >> int size; >> #define GUEST_STATE_ELEMENT_TYPE_FLAG_GUEST_WIDE 0x1 >> -#define GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY 0x2 >> +#define GUEST_STATE_ELEMENT_TYPE_FLAG_HOST_WIDE 0x2 >> +#define GUEST_STATE_ELEMENT_TYPE_FLAG_READ_ONLY 0x4 >> uint16_t flags; >> void *(*location)(SpaprMachineStateNestedGuest *, target_ulong); >> size_t offset; -- Cheers ~ Vaibhav