From: Tonghao Zhang <tong@xxxxxxxxxxxxx> The number of online cpu may be not equal to possible cpu. bpftool prog profile, can not create pmu event on possible but on online cpu. $ dmidecode -s system-product-name PowerEdge R620 $ cat /sys/devices/system/cpu/online 0-31 $ cat /sys/devices/system/cpu/possible 0-47 BTW, we can disable CPU dynamically: $ echo 0 > /sys/devices/system/cpu/cpuX/online If CPU is offline, perf_event_open will return ENODEV. To fix this issue, check the value returned and skip offline CPU. Signed-off-by: Tonghao Zhang <tong@xxxxxxxxxxxxx> Cc: Quentin Monnet <quentin@xxxxxxxxxxxxx> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> Cc: Andrii Nakryiko <andrii@xxxxxxxxxx> Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx> Cc: Song Liu <song@xxxxxxxxxx> Cc: Yonghong Song <yhs@xxxxxx> Cc: John Fastabend <john.fastabend@xxxxxxxxx> Cc: KP Singh <kpsingh@xxxxxxxxxx> Cc: Stanislav Fomichev <sdf@xxxxxxxxxx> Cc: Hao Luo <haoluo@xxxxxxxxxx> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> --- v1: https://patchwork.kernel.org/project/netdevbpf/patch/20230117044902.98938-1-tong@xxxxxxxxxxxxx/ https://patchwork.kernel.org/project/netdevbpf/patch/20230117044902.98938-2-tong@xxxxxxxxxxxxx/ --- tools/bpf/bpftool/prog.c | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cfc9fdc1e863..f48067cb0496 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2233,10 +2233,36 @@ static void profile_close_perf_events(struct profiler_bpf *obj) profile_perf_event_cnt = 0; } +static int profile_open_perf_event(int mid, int cpu, int map_fd) +{ + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr, + -1/*pid*/, cpu, -1/*group_fd*/, 0); + if (pmu_fd < 0) { + if (errno == ENODEV) { + p_info("cpu %d may be offline, skip %s metric profiling.", + cpu, metrics[mid].name); + profile_perf_event_cnt++; + return 0; + } + return -1; + } + + if (bpf_map_update_elem(map_fd, + &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) + return -1; + + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + return 0; +} + static int profile_open_perf_events(struct profiler_bpf *obj) { unsigned int cpu, m; - int map_fd, pmu_fd; + int map_fd; profile_perf_events = calloc( sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); @@ -2255,17 +2281,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj) if (!metrics[m].selected) continue; for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { - pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, - -1/*pid*/, cpu, -1/*group_fd*/, 0); - if (pmu_fd < 0 || - bpf_map_update_elem(map_fd, &profile_perf_event_cnt, - &pmu_fd, BPF_ANY) || - ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + if (profile_open_perf_event(m, cpu, map_fd)) { p_err("failed to create event %s on cpu %d", metrics[m].name, cpu); return -1; } - profile_perf_events[profile_perf_event_cnt++] = pmu_fd; } } return 0; -- 2.27.0