On Fri, Jan 13, 2023 at 4:08 AM Andrew Jones <ajones@xxxxxxxxxxxxxxxx> wrote: > > On Thu, Dec 15, 2022 at 09:00:46AM -0800, Atish Patra wrote: > > SBI PMU extension defines a set of firmware events which can provide > > useful information to guests about number of SBI calls. As hypervisor > > implements the SBI PMU extension, these firmware events corresponds > > to ecall invocations between VS->HS mode. All other firmware events > > will always report zero if monitored as KVM doesn't implement them. > > > > Signed-off-by: Atish Patra <atishp@xxxxxxxxxxxx> > > --- > > arch/riscv/include/asm/kvm_vcpu_pmu.h | 16 ++++ > > arch/riscv/include/asm/sbi.h | 2 +- > > arch/riscv/kvm/tlb.c | 6 +- > > arch/riscv/kvm/vcpu_pmu.c | 105 ++++++++++++++++++++++---- > > arch/riscv/kvm/vcpu_sbi_replace.c | 7 ++ > > 5 files changed, 119 insertions(+), 17 deletions(-) > > > > diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h > > index 7a9a8e6..cccc6182 100644 > > --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h > > +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h > > @@ -17,6 +17,14 @@ > > #define RISCV_KVM_MAX_FW_CTRS 32 > > #define RISCV_MAX_COUNTERS 64 > > > > +struct kvm_fw_event { > > + /* Current value of the event */ > > + unsigned long value; > > + > > + /* Event monitoring status */ > > + bool started; > > +}; > > + > > /* Per virtual pmu counter data */ > > struct kvm_pmc { > > u8 idx; > > @@ -25,11 +33,14 @@ struct kvm_pmc { > > union sbi_pmu_ctr_info cinfo; > > /* Event monitoring status */ > > bool started; > > + /* Monitoring event ID */ > > + unsigned long event_idx; > > }; > > > > /* PMU data structure per vcpu */ > > struct kvm_pmu { > > struct kvm_pmc pmc[RISCV_MAX_COUNTERS]; > > + struct kvm_fw_event fw_event[RISCV_KVM_MAX_FW_CTRS]; > > /* Number of the virtual firmware counters available */ > > int num_fw_ctrs; > > /* Number of the virtual hardware counters available */ > > @@ -52,6 +63,7 @@ struct kvm_pmu { > > { .base = CSR_CYCLE, .count = 31, .func = kvm_riscv_vcpu_pmu_read_hpm }, > > #endif > > > > +int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid); > > int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, > > unsigned long *val, unsigned long new_val, > > unsigned long wr_mask); > > @@ -81,6 +93,10 @@ struct kvm_pmu { > > #define KVM_RISCV_VCPU_HPMCOUNTER_CSR_FUNCS \ > > { .base = 0, .count = 0, .func = NULL }, > > > > +static inline int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) > > +{ > > + return 0; > > +} > > > > static inline int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) > > { > > diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h > > index 2a0ef738..a192a95a 100644 > > --- a/arch/riscv/include/asm/sbi.h > > +++ b/arch/riscv/include/asm/sbi.h > > @@ -171,7 +171,7 @@ enum sbi_pmu_fw_generic_events_t { > > SBI_PMU_FW_IPI_SENT = 6, > > SBI_PMU_FW_IPI_RECVD = 7, > > SBI_PMU_FW_FENCE_I_SENT = 8, > > - SBI_PMU_FW_FENCE_I_RECVD = 9, > > + SBI_PMU_FW_FENCE_I_RCVD = 9, > > This should probably be in its own patch. done. > > > SBI_PMU_FW_SFENCE_VMA_SENT = 10, > > SBI_PMU_FW_SFENCE_VMA_RCVD = 11, > > SBI_PMU_FW_SFENCE_VMA_ASID_SENT = 12, > > diff --git a/arch/riscv/kvm/tlb.c b/arch/riscv/kvm/tlb.c > > index 309d79b..de81920 100644 > > --- a/arch/riscv/kvm/tlb.c > > +++ b/arch/riscv/kvm/tlb.c > > @@ -181,6 +181,7 @@ void kvm_riscv_local_tlb_sanitize(struct kvm_vcpu *vcpu) > > > > void kvm_riscv_fence_i_process(struct kvm_vcpu *vcpu) > > { > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_RCVD); > > local_flush_icache_all(); > > } > > > > @@ -264,15 +265,18 @@ void kvm_riscv_hfence_process(struct kvm_vcpu *vcpu) > > d.addr, d.size, d.order); > > break; > > case KVM_RISCV_HFENCE_VVMA_ASID_GVA: > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); > > kvm_riscv_local_hfence_vvma_asid_gva( > > READ_ONCE(v->vmid), d.asid, > > d.addr, d.size, d.order); > > break; > > case KVM_RISCV_HFENCE_VVMA_ASID_ALL: > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_RCVD); > > kvm_riscv_local_hfence_vvma_asid_all( > > READ_ONCE(v->vmid), d.asid); > > break; > > case KVM_RISCV_HFENCE_VVMA_GVA: > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_RCVD); > > kvm_riscv_local_hfence_vvma_gva( > > READ_ONCE(v->vmid), > > d.addr, d.size, d.order); > > @@ -323,7 +327,7 @@ void kvm_riscv_fence_i(struct kvm *kvm, > > unsigned long hbase, unsigned long hmask) > > { > > make_xfence_request(kvm, hbase, hmask, KVM_REQ_FENCE_I, > > - KVM_REQ_FENCE_I, NULL); > > + KVM_REQ_FENCE_I, NULL); > > stray change, and whitespace was correct before > Fixed. > > } > > > > void kvm_riscv_hfence_gvma_vmid_gpa(struct kvm *kvm, > > diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c > > index 21c1f0f..a64a7ae 100644 > > --- a/arch/riscv/kvm/vcpu_pmu.c > > +++ b/arch/riscv/kvm/vcpu_pmu.c > > @@ -170,18 +170,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, > > return pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); > > } > > > > +int kvm_riscv_vcpu_pmu_incr_fw(struct kvm_vcpu *vcpu, unsigned long fid) > > +{ > > + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > > + struct kvm_fw_event *fevent; > > + > > + if (!kvpmu || fid >= SBI_PMU_FW_MAX) > > + return -EINVAL; > > + > > + fevent = &kvpmu->fw_event[fid]; > > + if (fevent->started) > > + fevent->value++; > > + > > + return 0; > > +} > > + > > static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, > > unsigned long *out_val) > > { > > struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > > struct kvm_pmc *pmc; > > u64 enabled, running; > > + int fevent_code; > > > > pmc = &kvpmu->pmc[cidx]; > > - if (!pmc->perf_event) > > - return -EINVAL; > > > > - pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); > > + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { > > + fevent_code = get_event_code(pmc->event_idx); > > + pmc->counter_val = kvpmu->fw_event[fevent_code].value; > > + } else if (pmc->perf_event) > > + pmc->counter_val += perf_event_read_value(pmc->perf_event, &enabled, &running); > > *out_val = pmc->counter_val; > > > > return 0; > > @@ -238,6 +256,7 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, > > struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > > int i, num_ctrs, pmc_index, sbiret = 0; > > struct kvm_pmc *pmc; > > + int fevent_code; > > > > num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs; > > if (ctr_base + __fls(ctr_mask) >= num_ctrs) { > > @@ -253,7 +272,22 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, > > pmc = &kvpmu->pmc[pmc_index]; > > if (flag & SBI_PMU_START_FLAG_SET_INIT_VALUE) > > pmc->counter_val = ival; > > - if (pmc->perf_event) { > > + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { > > + fevent_code = get_event_code(pmc->event_idx); > > + if (fevent_code >= SBI_PMU_FW_MAX) { > > + sbiret = SBI_ERR_INVALID_PARAM; > > + goto out; > > + } > > + > > + /* Check if the counter was already started for some reason */ > > + if (kvpmu->fw_event[fevent_code].started) { > > + sbiret = SBI_ERR_ALREADY_STARTED; > > + continue; > > + } > > + > > + kvpmu->fw_event[fevent_code].started = true; > > + kvpmu->fw_event[fevent_code].value = pmc->counter_val; > > + } else if (pmc->perf_event) { > > if (unlikely(pmc->started)) { > > sbiret = SBI_ERR_ALREADY_STARTED; > > continue; > > @@ -281,6 +315,7 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, > > int i, num_ctrs, pmc_index, sbiret = 0; > > u64 enabled, running; > > struct kvm_pmc *pmc; > > + int fevent_code; > > > > num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs; > > if ((ctr_base + __fls(ctr_mask)) >= num_ctrs) { > > @@ -294,7 +329,18 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, > > if (!test_bit(pmc_index, kvpmu->pmc_in_use)) > > continue; > > pmc = &kvpmu->pmc[pmc_index]; > > - if (pmc->perf_event) { > > + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { > > + fevent_code = get_event_code(pmc->event_idx); > > + if (fevent_code >= SBI_PMU_FW_MAX) { > > + sbiret = SBI_ERR_INVALID_PARAM; > > + goto out; > > + } > > + > > + if (!kvpmu->fw_event[fevent_code].started) > > + sbiret = SBI_ERR_ALREADY_STOPPED; > > + > > + kvpmu->fw_event[fevent_code].started = false; > > + } else if (pmc->perf_event) { > > if (pmc->started) { > > /* Stop counting the counter */ > > perf_event_disable(pmc->perf_event); > > @@ -307,12 +353,15 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, > > pmc->counter_val += perf_event_read_value(pmc->perf_event, > > &enabled, &running); > > pmu_release_perf_event(pmc); > > - clear_bit(pmc_index, kvpmu->pmc_in_use); > > } > > } else { > > kvm_debug("Can not stop counter due to invalid confiugartion\n"); > > sbiret = SBI_ERR_INVALID_PARAM; > > } > > + if (flag & SBI_PMU_STOP_FLAG_RESET) { > > + pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; > > + clear_bit(pmc_index, kvpmu->pmc_in_use); > > nit: I'd probably just leave clear_bit where it was and add > > if (flag & SBI_PMU_STOP_FLAG_RESET) > pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; > > to the firmware arm. > We have to do clear_bit for the firmware as well. That's why I moved it below to avoid the same code twice. > > + } > > } > > > > out: > > @@ -329,12 +378,12 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > > struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); > > struct perf_event *event; > > struct perf_event_attr attr; > > - int num_ctrs, ctr_idx; > > + int num_ctrs, ctr_idx, sbiret = 0; > > u32 etype = pmu_get_perf_event_type(eidx); > > u64 config; > > - struct kvm_pmc *pmc; > > - int sbiret = 0; > > - > > + struct kvm_pmc *pmc = NULL; > > + bool is_fevent; > > + unsigned long event_code; > > > > num_ctrs = kvpmu->num_fw_ctrs + kvpmu->num_hw_ctrs; > > if (etype == PERF_TYPE_MAX || (ctr_base + __fls(ctr_mask) >= num_ctrs)) { > > @@ -342,7 +391,9 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > > goto out; > > } > > > > - if (pmu_is_fw_event(eidx)) { > > + event_code = get_event_code(eidx); > > + is_fevent = pmu_is_fw_event(eidx); > > + if (is_fevent && event_code >= SBI_PMU_FW_MAX) { > > sbiret = SBI_ERR_NOT_SUPPORTED; > > goto out; > > } > > @@ -357,7 +408,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > > goto out; > > } > > ctr_idx = ctr_base; > > - goto match_done; > > + if (is_fevent) > > + goto perf_event_done; > > + else > > + goto match_done; > > } > > > > ctr_idx = pmu_get_pmc_index(kvpmu, eidx, ctr_base, ctr_mask); > > @@ -366,6 +420,13 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > > goto out; > > } > > > > + /* > > + * No need to create perf events for firmware events as the firmware counter > > + * is supposed to return the measurement of VS->HS mode invocations. > > + */ > > + if (is_fevent) > > + goto perf_event_done; > > + > > match_done: > > pmc = &kvpmu->pmc[ctr_idx]; > > pmu_release_perf_event(pmc); > > @@ -404,10 +465,19 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba > > return -EOPNOTSUPP; > > } > > > > - set_bit(ctr_idx, kvpmu->pmc_in_use); > > pmc->perf_event = event; > > - if (flag & SBI_PMU_CFG_FLAG_AUTO_START) > > - perf_event_enable(pmc->perf_event); > > Maybe we can move the perf setup stuff into a helper function and > then guard it with an if-statement rather than have the gotos? > Sure. Done. > > +perf_event_done: > > + if (flag & SBI_PMU_CFG_FLAG_AUTO_START) { > > + if (is_fevent) > > + kvpmu->fw_event[event_code].started = true; > > + else > > + perf_event_enable(pmc->perf_event); > > + } > > + /* This should be only true for firmware events */ > > + if (!pmc) > > + pmc = &kvpmu->pmc[ctr_idx]; > > + pmc->event_idx = eidx; > > + set_bit(ctr_idx, kvpmu->pmc_in_use); > > > > ext_data->out_val = ctr_idx; > > out: > > @@ -451,6 +521,7 @@ int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) > > bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); > > kvpmu->num_hw_ctrs = num_hw_ctrs; > > kvpmu->num_fw_ctrs = num_fw_ctrs; > > + memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); > > I'm wondering if we need this array. We already have an underused pmc for > the fw events which has counter_val and started. Can't we just used those? > My initial implementation was doing the same. However, pmc array is counter index based while fw_event is firmware event id based. We need to maintain a mapping between these two or iterate the pmc array one by one to find correct entry in kvm_riscv_vcpu_pmu_incr_fw. As the kvm_riscv_vcpu_pmu_incr_fw may be called from a performance sensitive path, a separate fw_event array helps quick lookup. > > /* > > * There is no corelation betwen the logical hardware counter and virtual counters. > > * However, we need to encode a hpmcounter CSR in the counter info field so that > > @@ -464,6 +535,7 @@ int kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) > > pmc = &kvpmu->pmc[i]; > > pmc->idx = i; > > pmc->counter_val = 0; > > + pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; > > if (i < kvpmu->num_hw_ctrs) { > > kvpmu->pmc[i].cinfo.type = SBI_PMU_CTR_TYPE_HW; > > if (i < 3) > > @@ -501,7 +573,10 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) > > for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { > > pmc = &kvpmu->pmc[i]; > > pmu_release_perf_event(pmc); > > + pmc->counter_val = 0; > > + pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; > > } > > + memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); > > } > > > > void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) > > diff --git a/arch/riscv/kvm/vcpu_sbi_replace.c b/arch/riscv/kvm/vcpu_sbi_replace.c > > index d029136..3f39711 100644 > > --- a/arch/riscv/kvm/vcpu_sbi_replace.c > > +++ b/arch/riscv/kvm/vcpu_sbi_replace.c > > @@ -11,6 +11,7 @@ > > #include <linux/kvm_host.h> > > #include <asm/sbi.h> > > #include <asm/kvm_vcpu_timer.h> > > +#include <asm/kvm_vcpu_pmu.h> > > #include <asm/kvm_vcpu_sbi.h> > > > > static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > @@ -26,6 +27,7 @@ static int kvm_sbi_ext_time_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > return 0; > > } > > > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_SET_TIMER); > > #if __riscv_xlen == 32 > > next_cycle = ((u64)cp->a1 << 32) | (u64)cp->a0; > > #else > > @@ -58,6 +60,7 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > return 0; > > } > > > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_IPI_SENT); > > kvm_for_each_vcpu(i, tmp, vcpu->kvm) { > > if (hbase != -1UL) { > > if (tmp->vcpu_id < hbase) > > @@ -68,6 +71,7 @@ static int kvm_sbi_ext_ipi_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, > > ret = kvm_riscv_vcpu_set_interrupt(tmp, IRQ_VS_SOFT); > > if (ret < 0) > > break; > > + kvm_riscv_vcpu_pmu_incr_fw(tmp, SBI_PMU_FW_IPI_RECVD); > > } > > > > return ret; > > @@ -91,6 +95,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run > > switch (funcid) { > > case SBI_EXT_RFENCE_REMOTE_FENCE_I: > > kvm_riscv_fence_i(vcpu->kvm, hbase, hmask); > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_FENCE_I_SENT); > > break; > > case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: > > if (cp->a2 == 0 && cp->a3 == 0) > > @@ -98,6 +103,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run > > else > > kvm_riscv_hfence_vvma_gva(vcpu->kvm, hbase, hmask, > > cp->a2, cp->a3, PAGE_SHIFT); > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_SENT); > > break; > > case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: > > if (cp->a2 == 0 && cp->a3 == 0) > > @@ -108,6 +114,7 @@ static int kvm_sbi_ext_rfence_handler(struct kvm_vcpu *vcpu, struct kvm_run *run > > hbase, hmask, > > cp->a2, cp->a3, > > PAGE_SHIFT, cp->a4); > > + kvm_riscv_vcpu_pmu_incr_fw(vcpu, SBI_PMU_FW_HFENCE_VVMA_ASID_SENT); > > break; > > case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: > > case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: > > -- > > 2.25.1 > > > > It think it'd be nice to break the application of > kvm_riscv_vcpu_pmu_incr_fw() out of this patch. I.e. introduce > kvm_riscv_vcpu_pmu_incr_fw() in this patch and then a second patch > applies it to all the ecalls. > Done. > Thanks, > drew -- Regards, Atish