On Fri, Jun 16, 2023 at 10:28 AM Atish Patra <atishp@xxxxxxxxxxxxxx> wrote: > > On Fri, May 12, 2023 at 1:58 AM Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> wrote: > > > > Provide all the necessary bits in the generic riscv pmu driver to be > > able to mmap perf events in userspace: the heavy lifting lies in the > > driver backend, namely the legacy and sbi implementations. > > > > Note that arch_perf_update_userpage is almost a copy of arm64 code. > > > > Signed-off-by: Alexandre Ghiti <alexghiti@xxxxxxxxxxxx> > > --- > > arch/riscv/kernel/Makefile | 2 +- > > arch/riscv/kernel/perf_event.c | 58 ++++++++++++++++++++++++++++++++++ > > drivers/perf/riscv_pmu.c | 41 ++++++++++++++++++++++++ > > include/linux/perf/riscv_pmu.h | 4 +++ > > 4 files changed, 104 insertions(+), 1 deletion(-) > > create mode 100644 arch/riscv/kernel/perf_event.c > > > > diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile > > index 4cf303a779ab..0d215fd9860d 100644 > > --- a/arch/riscv/kernel/Makefile > > +++ b/arch/riscv/kernel/Makefile > > @@ -70,7 +70,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o > > > > obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o > > > > -obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o > > +obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o perf_event.o > > obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o > > obj-$(CONFIG_RISCV_SBI) += sbi.o > > ifeq ($(CONFIG_RISCV_SBI), y) > > diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c > > new file mode 100644 > > index 000000000000..94174a0fc251 > > --- /dev/null > > +++ b/arch/riscv/kernel/perf_event.c > > @@ -0,0 +1,58 @@ > > +// SPDX-License-Identifier: GPL-2.0-only > > +#include <linux/sched_clock.h> > > + > > +void arch_perf_update_userpage(struct perf_event *event, > > + struct perf_event_mmap_page *userpg, u64 now) > > +{ > > + struct clock_read_data *rd; > > + unsigned int seq; > > + u64 ns; > > + > > + userpg->cap_user_time = 0; > > + userpg->cap_user_time_zero = 0; > > + userpg->cap_user_time_short = 0; > > + userpg->cap_user_rdpmc = > > + !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT); > > + > > + userpg->pmc_width = 64; > > + > > The counter width is 64 for cycle & instret. Other hpmcounter can have > different width. > This information should retrieved from counter info. Yes, this is done in patch 7 when I adapt the perf SBI backend to allow the user access. > > > + do { > > + rd = sched_clock_read_begin(&seq); > > + > > + userpg->time_mult = rd->mult; > > + userpg->time_shift = rd->shift; > > + userpg->time_zero = rd->epoch_ns; > > + userpg->time_cycles = rd->epoch_cyc; > > + userpg->time_mask = rd->sched_clock_mask; > > + > > + /* > > + * Subtract the cycle base, such that software that > > + * doesn't know about cap_user_time_short still 'works' > > + * assuming no wraps. > > + */ > > + ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift); > > + userpg->time_zero -= ns; > > + > > + } while (sched_clock_read_retry(seq)); > > + > > + userpg->time_offset = userpg->time_zero - now; > > + > > + /* > > + * time_shift is not expected to be greater than 31 due to > > + * the original published conversion algorithm shifting a > > + * 32-bit value (now specifies a 64-bit value) - refer > > + * perf_event_mmap_page documentation in perf_event.h. > > + */ > > + if (userpg->time_shift == 32) { > > + userpg->time_shift = 31; > > + userpg->time_mult >>= 1; > > + } > > + > > + /* > > + * Internal timekeeping for enabled/running/stopped times > > + * is always computed with the sched_clock. > > + */ > > + userpg->cap_user_time = 1; > > + userpg->cap_user_time_zero = 1; > > + userpg->cap_user_time_short = 1; > > +} > > diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c > > index ebca5eab9c9b..af69da268246 100644 > > --- a/drivers/perf/riscv_pmu.c > > +++ b/drivers/perf/riscv_pmu.c > > @@ -171,6 +171,8 @@ int riscv_pmu_event_set_period(struct perf_event *event) > > > > local64_set(&hwc->prev_count, (u64)-left); > > > > + perf_event_update_userpage(event); > > + > > return overflow; > > } > > > > @@ -267,6 +269,9 @@ static int riscv_pmu_event_init(struct perf_event *event) > > hwc->idx = -1; > > hwc->event_base = mapped_event; > > > > + if (rvpmu->event_init) > > + rvpmu->event_init(event); > > + > > if (!is_sampling_event(event)) { > > /* > > * For non-sampling runs, limit the sample_period to half > > @@ -283,6 +288,39 @@ static int riscv_pmu_event_init(struct perf_event *event) > > return 0; > > } > > > > +static int riscv_pmu_event_idx(struct perf_event *event) > > +{ > > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); > > + > > + if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)) > > + return 0; > > + > > + if (rvpmu->csr_index) > > + return rvpmu->csr_index(event) + 1; > > + > > + return 0; > > +} > > + > > +static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm) > > +{ > > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); > > + > > + if (rvpmu->event_mapped) { > > + rvpmu->event_mapped(event, mm); > > + perf_event_update_userpage(event); > > + } > > +} > > + > > +static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm) > > +{ > > + struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu); > > + > > + if (rvpmu->event_unmapped) { > > + rvpmu->event_unmapped(event, mm); > > + perf_event_update_userpage(event); > > + } > > +} > > + > > struct riscv_pmu *riscv_pmu_alloc(void) > > { > > struct riscv_pmu *pmu; > > @@ -307,6 +345,9 @@ struct riscv_pmu *riscv_pmu_alloc(void) > > } > > pmu->pmu = (struct pmu) { > > .event_init = riscv_pmu_event_init, > > + .event_mapped = riscv_pmu_event_mapped, > > + .event_unmapped = riscv_pmu_event_unmapped, > > + .event_idx = riscv_pmu_event_idx, > > .add = riscv_pmu_add, > > .del = riscv_pmu_del, > > .start = riscv_pmu_start, > > diff --git a/include/linux/perf/riscv_pmu.h b/include/linux/perf/riscv_pmu.h > > index 9f70d94942e0..1452c8af3b67 100644 > > --- a/include/linux/perf/riscv_pmu.h > > +++ b/include/linux/perf/riscv_pmu.h > > @@ -55,6 +55,10 @@ struct riscv_pmu { > > void (*ctr_start)(struct perf_event *event, u64 init_val); > > void (*ctr_stop)(struct perf_event *event, unsigned long flag); > > int (*event_map)(struct perf_event *event, u64 *config); > > + void (*event_init)(struct perf_event *event); > > + void (*event_mapped)(struct perf_event *event, struct mm_struct *mm); > > + void (*event_unmapped)(struct perf_event *event, struct mm_struct *mm); > > + uint8_t (*csr_index)(struct perf_event *event); > > > > struct cpu_hw_events __percpu *hw_events; > > struct hlist_node node; > > -- > > 2.37.2 > > > > > -- > Regards, > Atish