pid_max
=======
diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h
index d42c901f9a97..9fdfdd9dc92d 100644
--- a/arch/riscv/include/asm/perf_event.h
+++ b/arch/riscv/include/asm/perf_event.h
@@ -9,5 +9,8 @@
#define _ASM_RISCV_PERF_EVENT_H
#include <linux/perf_event.h>
+
+#define PERF_EVENT_FLAG_LEGACY 1
+
#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs
#endif /* _ASM_RISCV_PERF_EVENT_H */
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index aa22f87faeae..9ae951b07847 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -70,7 +70,7 @@ obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
-obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
+obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o perf_event.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o
ifeq ($(CONFIG_RISCV_SBI), y)
diff --git a/arch/riscv/kernel/perf_event.c b/arch/riscv/kernel/perf_event.c
new file mode 100644
index 000000000000..4a75ab628bfb
--- /dev/null
+++ b/arch/riscv/kernel/perf_event.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/perf/riscv_pmu.h>
+#include <linux/sched_clock.h>
+
+void arch_perf_update_userpage(struct perf_event *event,
+ struct perf_event_mmap_page *userpg, u64 now)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+ struct clock_read_data *rd;
+ unsigned int seq;
+ u64 ns;
+
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ userpg->cap_user_time_short = 0;
+ userpg->cap_user_rdpmc =
+ !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
+
+ /*
+ * The counters are 64-bit but the priv spec doesn't mandate all the
+ * bits to be implemented: that's why, counter width can vary based on
+ * the cpu vendor.
+ */
+ userpg->pmc_width = rvpmu->ctr_get_width(event->hw.idx) + 1;
+
+ do {
+ rd = sched_clock_read_begin(&seq);
+
+ userpg->time_mult = rd->mult;
+ userpg->time_shift = rd->shift;
+ userpg->time_zero = rd->epoch_ns;
+ userpg->time_cycles = rd->epoch_cyc;
+ userpg->time_mask = rd->sched_clock_mask;
+
+ /*
+ * Subtract the cycle base, such that software that
+ * doesn't know about cap_user_time_short still 'works'
+ * assuming no wraps.
+ */
+ ns = mul_u64_u32_shr(rd->epoch_cyc, rd->mult, rd->shift);
+ userpg->time_zero -= ns;
+
+ } while (sched_clock_read_retry(seq));
+
+ userpg->time_offset = userpg->time_zero - now;
+
+ /*
+ * time_shift is not expected to be greater than 31 due to
+ * the original published conversion algorithm shifting a
+ * 32-bit value (now specifies a 64-bit value) - refer
+ * perf_event_mmap_page documentation in perf_event.h.
+ */
+ if (userpg->time_shift == 32) {
+ userpg->time_shift = 31;
+ userpg->time_mult >>= 1;
+ }
+
+ /*
+ * Internal timekeeping for enabled/running/stopped times
+ * is always computed with the sched_clock.
+ */
+ userpg->cap_user_time = 1;
+ userpg->cap_user_time_zero = 1;
+ userpg->cap_user_time_short = 1;
+}
diff --git a/drivers/perf/riscv_pmu.c b/drivers/perf/riscv_pmu.c
index ebca5eab9c9b..12675ee1123c 100644
--- a/drivers/perf/riscv_pmu.c
+++ b/drivers/perf/riscv_pmu.c
@@ -171,6 +171,8 @@ int riscv_pmu_event_set_period(struct perf_event *event)
local64_set(&hwc->prev_count, (u64)-left);
+ perf_event_update_userpage(event);
+
return overflow;
}
@@ -283,6 +285,43 @@ static int riscv_pmu_event_init(struct perf_event *event)
return 0;
}
+static int riscv_pmu_event_idx(struct perf_event *event)
+{
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ if (!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT))
+ return 0;
+
+ /*
+ * cycle and instret can either be retrieved from their fixed counters
+ * or from programmable counters, the latter being the preferred way
+ * since cycle and instret counters do not support sampling.
+ */
+
+ return rvpmu->csr_index(event) + 1;
+}
+
+static void riscv_pmu_event_mapped(struct perf_event *event, struct mm_struct *mm)
+{
+ /*
+ * The user mmapped the event to directly access it: this is where
+ * we determine based on sysctl_perf_user_access if we grant userspace
+ * the direct access to this event. That means that within the same
+ * task, some events may be directly accessible and some other may not,
+ * if the user changes the value of sysctl_perf_user_accesss in the
+ * meantime.
+ */
+ struct riscv_pmu *rvpmu = to_riscv_pmu(event->pmu);
+
+ event->hw.flags |= rvpmu->event_flags(event);
+ perf_event_update_userpage(event);
+}
+
+static void riscv_pmu_event_unmapped(struct perf_event *event, struct mm_struct *mm)
+{
+ event->hw.flags &= ~PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
struct riscv_pmu *riscv_pmu_alloc(void)
{
struct riscv_pmu *pmu;
@@ -307,6 +346,9 @@ struct riscv_pmu *riscv_pmu_alloc(void)
}
pmu->pmu = (struct pmu) {
.event_init = riscv_pmu_event_init,
+ .event_mapped = riscv_pmu_event_mapped,
+ .event_unmapped = riscv_pmu_event_unmapped,
+ .event_idx = riscv_pmu_event_idx,
.add = riscv_pmu_add,
.del = riscv_pmu_del,
.start = riscv_pmu_start,
diff --git a/drivers/perf/riscv_pmu_legacy.c b/drivers/perf/riscv_pmu_legacy.c
index 0d8c9d8849ee..35c4c9097a0f 100644
--- a/drivers/perf/riscv_pmu_legacy.c
+++ b/drivers/perf/riscv_pmu_legacy.c
@@ -74,6 +74,21 @@ static void pmu_legacy_ctr_start(struct perf_event *event, u64 ival)
local64_set(&hwc->prev_count, initial_val);
}
+static uint8_t pmu_legacy_csr_index(struct perf_event *event)
+{
+ return event->hw.idx;
+}
+
+static int pmu_legacy_event_flags(struct perf_event *event)
+{
+ /* In legacy mode, the first 3 CSRs are available. */
+ if (event->attr.config != PERF_COUNT_HW_CPU_CYCLES &&
+ event->attr.config != PERF_COUNT_HW_INSTRUCTIONS)
+ return 0;
+
+ return PERF_EVENT_FLAG_USER_READ_CNT;
+}
+
/*
* This is just a simple implementation to allow legacy implementations
* compatible with new RISC-V PMU driver framework.
@@ -94,6 +109,8 @@ static void pmu_legacy_init(struct riscv_pmu *pmu)
pmu->ctr_get_width = NULL;
pmu->ctr_clear_idx = NULL;
pmu->ctr_read = pmu_legacy_read_ctr;
+ pmu->event_flags = pmu_legacy_event_flags;
+ pmu->csr_index = pmu_legacy_csr_index;
perf_pmu_register(&pmu->pmu, "cpu", PERF_TYPE_RAW);
}
diff --git a/drivers/perf/riscv_pmu_sbi.c b/drivers/perf/riscv_pmu_sbi.c
index 70cb50fd41c2..af7f3128b6b8 100644
--- a/drivers/perf/riscv_pmu_sbi.c
+++ b/drivers/perf/riscv_pmu_sbi.c
@@ -24,6 +24,10 @@
#include <asm/sbi.h>
#include <asm/hwcap.h>
+#define SYSCTL_NO_USER_ACCESS 0
+#define SYSCTL_USER_ACCESS 1
+#define SYSCTL_LEGACY 2
+
PMU_FORMAT_ATTR(event, "config:0-47");
PMU_FORMAT_ATTR(firmware, "config:63");
@@ -43,6 +47,9 @@ static const struct attribute_group *riscv_pmu_attr_groups[] = {
NULL,
};
+/* Allow legacy access by default */
+static int sysctl_perf_user_access __read_mostly = SYSCTL_LEGACY;
+
/*
* RISC-V doesn't have heterogeneous harts yet. This need to be part of
* per_cpu in case of harts with different pmu counters
@@ -301,6 +308,11 @@ int riscv_pmu_get_hpm_info(u32 *hw_ctr_width, u32 *num_hw_ctr)
}
EXPORT_SYMBOL_GPL(riscv_pmu_get_hpm_info);
+static uint8_t pmu_sbi_csr_index(struct perf_event *event)
+{
+ return pmu_ctr_list[event->hw.idx].csr - CSR_CYCLE;
+}
+
static unsigned long pmu_sbi_get_filter_flags(struct perf_event *event)
{
unsigned long cflags = 0;
@@ -329,18 +341,30 @@ static int pmu_sbi_ctr_get_idx(struct perf_event *event)
struct cpu_hw_events *cpuc = this_cpu_ptr(rvpmu->hw_events);
struct sbiret ret;
int idx;
- uint64_t cbase = 0;
+ uint64_t cbase = 0, cmask = rvpmu->cmask;
unsigned long cflags = 0;
cflags = pmu_sbi_get_filter_flags(event);
+
+ /* In legacy mode, we have to force the fixed counters for those events */
+ if (hwc->flags & PERF_EVENT_FLAG_LEGACY) {
+ if (event->attr.config == PERF_COUNT_HW_CPU_CYCLES) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1;
+ } else if (event->attr.config == PERF_COUNT_HW_INSTRUCTIONS) {
+ cflags |= SBI_PMU_CFG_FLAG_SKIP_MATCH;
+ cmask = 1UL << (CSR_INSTRET - CSR_CYCLE);
+ }
+ }
+
/* retrieve the available counter index */
#if defined(CONFIG_32BIT)
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config,
+ cmask, cflags, hwc->event_base, hwc->config,
hwc->config >> 32);
#else
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase,
- rvpmu->cmask, cflags, hwc->event_base, hwc->config, 0);
+ cmask, cflags, hwc->event_base, hwc->config, 0);
#endif
if (ret.error) {
pr_debug("Not able to find a counter for event %lx config %llx\n",
@@ -490,6 +514,11 @@ static void pmu_sbi_ctr_start(struct perf_event *event, u64 ival)
if (ret.error && (ret.error != SBI_ERR_ALREADY_STARTED))
pr_err("Starting counter idx %d failed with error %d\n",
hwc->idx, sbi_err_map_linux_errno(ret.error));
+
+ if (!(event->hw.flags & PERF_EVENT_FLAG_LEGACY) &&
+ event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) | (1 << pmu_sbi_csr_index(event)));
}
static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
@@ -497,6 +526,11 @@ static void pmu_sbi_ctr_stop(struct perf_event *event, unsigned long flag)
struct sbiret ret;
struct hw_perf_event *hwc = &event->hw;
+ if (!(event->hw.flags & PERF_EVENT_FLAG_LEGACY) &&
+ event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT)
+ csr_write(CSR_SCOUNTEREN,
+ csr_read(CSR_SCOUNTEREN) & ~(1 << pmu_sbi_csr_index(event)));
+
ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, hwc->idx, 1, flag, 0, 0, 0);
if (ret.error && (ret.error != SBI_ERR_ALREADY_STOPPED) &&
flag != SBI_PMU_STOP_FLAG_RESET)
@@ -704,10 +738,13 @@ static int pmu_sbi_starting_cpu(unsigned int cpu, struct hlist_node *node)
struct cpu_hw_events *cpu_hw_evt = this_cpu_ptr(pmu->hw_events);
/*
- * Enable the access for CYCLE, TIME, and INSTRET CSRs from userspace,
- * as is necessary to maintain uABI compatibility.
+ * We keep enabling userspace access to CYCLE, TIME and INSRET via the
+ * legacy option but that will be removed in the future.