On Sat, Dec 30, 2023 at 3:20 AM Atish Patra <atishp@xxxxxxxxxxxx> wrote: > > PMU Snapshot function allows to minimize the number of traps when the > guest access configures/access the hpmcounters. If the snapshot feature > is enabled, the hypervisor updates the shared memory with counter > data and state of overflown counters. The guest can just read the > shared memory instead of trap & emulate done by the hypervisor. > > This patch doesn't implement the counter overflow yet. > > Signed-off-by: Atish Patra <atishp@xxxxxxxxxxxx> LGTM. Reviewed-by: Anup Patel <anup@xxxxxxxxxxxxxx> Regards, Anup > --- > arch/riscv/include/asm/kvm_vcpu_pmu.h | 9 ++ > arch/riscv/kvm/aia.c | 5 ++ > arch/riscv/kvm/vcpu_onereg.c | 7 +- > arch/riscv/kvm/vcpu_pmu.c | 120 +++++++++++++++++++++++++- > arch/riscv/kvm/vcpu_sbi_pmu.c | 3 + > 5 files changed, 140 insertions(+), 4 deletions(-) > > diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h > index 395518a1664e..d56b901a61fc 100644 > --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h > +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h > @@ -50,6 +50,12 @@ struct kvm_pmu { > bool init_done; > /* Bit map of all the virtual counter used */ > DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); > + /* Bit map of all the virtual counter overflown */ > + DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS); > + /* The address of the counter snapshot area (guest physical address) */ > + gpa_t snapshot_addr; > + /* The actual data of the snapshot */ > + struct riscv_pmu_snapshot_data *sdata; > }; > > #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) > @@ -85,6 +91,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, > struct kvm_vcpu_sbi_return *retdata); > void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); > +int kvm_riscv_vcpu_pmu_setup_snapshot(struct kvm_vcpu *vcpu, unsigned long saddr_low, > + unsigned long saddr_high, unsigned long flags, > + struct kvm_vcpu_sbi_return *retdata); > void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); > void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); > > diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c > index a944294f6f23..71d161d7430d 100644 > --- a/arch/riscv/kvm/aia.c > +++ b/arch/riscv/kvm/aia.c > @@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void) > enable_percpu_irq(hgei_parent_irq, > irq_get_trigger_type(hgei_parent_irq)); > csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); > + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ > + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) > + csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); > } > > void kvm_riscv_aia_disable(void) > @@ -560,6 +563,8 @@ void kvm_riscv_aia_disable(void) > > /* Disable per-CPU SGEI interrupt */ > csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); > + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) > + csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF)); > disable_percpu_irq(hgei_parent_irq); > > aia_set_hvictl(false); > diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c > index fc34557f5356..581568847910 100644 > --- a/arch/riscv/kvm/vcpu_onereg.c > +++ b/arch/riscv/kvm/vcpu_onereg.c > @@ -117,8 +117,13 @@ void kvm_riscv_vcpu_setup_isa(struct kvm_vcpu *vcpu) > for (i = 0; i < ARRAY_SIZE(kvm_isa_ext_arr); i++) { > host_isa = kvm_isa_ext_arr[i]; > if (__riscv_isa_extension_available(NULL, host_isa) && > - kvm_riscv_vcpu_isa_enable_allowed(i)) > + kvm_riscv_vcpu_isa_enable_allowed(i)) { > + /* Sscofpmf depends on interrupt filtering defined in ssaia */ > + if (host_isa == RISCV_ISA_EXT_SSCOFPMF && > + !__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA)) > + continue; > set_bit(host_isa, vcpu->arch.isa); > + } > } > } > > diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c > index 08f561998611..e980235b8436 100644 > --- a/arch/riscv/kvm/vcpu_pmu.c > +++ b/arch/riscv/kvm/vcpu_pmu.c > @@ -311,6 +311,81 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, > return ret; > } > > +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) > +{ > + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); > + > + if (kvpmu->sdata) { > + memset(kvpmu->sdata, 0, snapshot_area_size); > + if (kvpmu->snapshot_addr != INVALID_GPA) > + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, > + kvpmu->sdata, snapshot_area_size); > + kfree(kvpmu->sdata); > + kvpmu->sdata = NULL; > + } > + kvpmu->snapshot_addr = INVALID_GPA; > +} > + > +int kvm_riscv_vcpu_pmu_setup_snapshot(struct kvm_vcpu *vcpu, unsigned long saddr_low, > + unsigned long saddr_high, unsigned long flags, > + struct kvm_vcpu_sbi_return *retdata) > +{ > + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); > + int sbiret = 0; > + gpa_t saddr; > + unsigned long hva; > + bool writable; > + > + if (!kvpmu) { > + sbiret = SBI_ERR_INVALID_PARAM; > + goto out; > + } > + > + if (saddr_low == -1 && saddr_high == -1) { > + kvm_pmu_clear_snapshot_area(vcpu); > + return 0; > + } > + > + saddr = saddr_low; > + > + if (saddr_high != 0) { > + if (IS_ENABLED(CONFIG_32BIT)) > + saddr |= ((gpa_t)saddr << 32); > + else > + sbiret = SBI_ERR_INVALID_ADDRESS; > + goto out; > + } > + > + if (kvm_is_error_gpa(vcpu->kvm, saddr)) { > + sbiret = SBI_ERR_INVALID_PARAM; > + goto out; > + } > + > + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); > + if (kvm_is_error_hva(hva) || !writable) { > + sbiret = SBI_ERR_INVALID_ADDRESS; > + goto out; > + } > + > + kvpmu->snapshot_addr = saddr; > + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); > + if (!kvpmu->sdata) > + return -ENOMEM; > + > + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { > + kfree(kvpmu->sdata); > + kvpmu->snapshot_addr = INVALID_GPA; > + sbiret = SBI_ERR_FAILURE; > + } > + > +out: > + retdata->err_val = sbiret; > + > + return 0; > +} > + > int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, > struct kvm_vcpu_sbi_return *retdata) > { > @@ -344,20 +419,32 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, > int i, pmc_index, sbiret = 0; > struct kvm_pmc *pmc; > int fevent_code; > + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_FROM_SNAPSHOT; > > - if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { > + if ((kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0)) { > sbiret = SBI_ERR_INVALID_PARAM; > goto out; > } > > + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { > + sbiret = SBI_ERR_NO_SHMEM; > + goto out; > + } > + > /* Start the counters that have been configured and requested by the guest */ > for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { > pmc_index = i + ctr_base; > if (!test_bit(pmc_index, kvpmu->pmc_in_use)) > continue; > pmc = &kvpmu->pmc[pmc_index]; > - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) > + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { > pmc->counter_val = ival; > + } else if (snap_flag_set) { > + kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, > + sizeof(struct riscv_pmu_snapshot_data)); > + pmc->counter_val = kvpmu->sdata->ctr_values[pmc_index]; > + } > + > if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { > fevent_code = get_event_code(pmc->event_idx); > if (fevent_code >= SBI_PMU_FW_MAX) { > @@ -401,12 +488,18 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, > u64 enabled, running; > struct kvm_pmc *pmc; > int fevent_code; > + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; > > - if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { > + if ((kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0)) { > sbiret = SBI_ERR_INVALID_PARAM; > goto out; > } > > + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { > + sbiret = SBI_ERR_NO_SHMEM; > + goto out; > + } > + > /* Stop the counters that have been configured and requested by the guest */ > for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { > pmc_index = i + ctr_base; > @@ -439,9 +532,28 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, > } else { > sbiret = SBI_ERR_INVALID_PARAM; > } > + > + if (snap_flag_set && !sbiret) { > + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) > + pmc->counter_val = kvpmu->fw_event[fevent_code].value; > + else if (pmc->perf_event) > + pmc->counter_val += perf_event_read_value(pmc->perf_event, > + &enabled, &running); > + /* TODO: Add counter overflow support when sscofpmf support is added */ > + kvpmu->sdata->ctr_values[i] = pmc->counter_val; > + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, > + sizeof(struct riscv_pmu_snapshot_data)); > + } > + > if (flags & SBI_PMU_STOP_FLAG_RESET) { > pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; > clear_bit(pmc_index, kvpmu->pmc_in_use); > + if (snap_flag_set) { > + /* Clear the snapshot area for the upcoming deletion event */ > + kvpmu->sdata->ctr_values[i] = 0; > + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, > + sizeof(struct riscv_pmu_snapshot_data)); > + } > } > } > > @@ -567,6 +679,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) > kvpmu->num_hw_ctrs = num_hw_ctrs + 1; > kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; > memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); > + kvpmu->snapshot_addr = INVALID_GPA; > > if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { > pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); > @@ -626,6 +739,7 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) > } > bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); > memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); > + kvm_pmu_clear_snapshot_area(vcpu); > } > > void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) > diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c > index b70179e9e875..9f61136e4bb1 100644 > --- a/arch/riscv/kvm/vcpu_sbi_pmu.c > +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c > @@ -64,6 +64,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > case SBI_EXT_PMU_COUNTER_FW_READ: > ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); > break; > + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: > + ret = kvm_riscv_vcpu_pmu_setup_snapshot(vcpu, cp->a0, cp->a1, cp->a2, retdata); > + break; > default: > retdata->err_val = SBI_ERR_NOT_SUPPORTED; > } > -- > 2.34.1 >