> On Jan 18, 2023, at 6:41 PM, Quentin Monnet <quentin@xxxxxxxxxxxxx> wrote: > > 2023-01-17 12:49 UTC+0800 ~ tong@xxxxxxxxxxxxx >> From: Tonghao Zhang <tong@xxxxxxxxxxxxx> >> >> The number of online cpu may be not equal to possible cpu. >> bpftool prog profile, can not create pmu event on possible >> but not online cpu. > > s/not/on/ ? > >> >> $ dmidecode -s system-product-name >> PowerEdge R620 >> $ cat /sys/devices/system/cpu/online >> 0-31 >> $ cat /sys/devices/system/cpu/possible >> 0-47 >> >> To fix this issue, use online cpu instead of possible, to >> create perf event and other resource. >> >> Signed-off-by: Tonghao Zhang <tong@xxxxxxxxxxxxx> >> Cc: Quentin Monnet <quentin@xxxxxxxxxxxxx> >> Cc: Alexei Starovoitov <ast@xxxxxxxxxx> >> Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx> >> Cc: Andrii Nakryiko <andrii@xxxxxxxxxx> >> Cc: Martin KaFai Lau <martin.lau@xxxxxxxxx> >> Cc: Song Liu <song@xxxxxxxxxx> >> Cc: Yonghong Song <yhs@xxxxxx> >> Cc: John Fastabend <john.fastabend@xxxxxxxxx> >> Cc: KP Singh <kpsingh@xxxxxxxxxx> >> Cc: Stanislav Fomichev <sdf@xxxxxxxxxx> >> Cc: Hao Luo <haoluo@xxxxxxxxxx> >> Cc: Jiri Olsa <jolsa@xxxxxxxxxx> >> --- >> tools/bpf/bpftool/prog.c | 5 +++-- >> 1 file changed, 3 insertions(+), 2 deletions(-) >> >> diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c >> index cfc9fdc1e863..08b352dd799e 100644 >> --- a/tools/bpf/bpftool/prog.c >> +++ b/tools/bpf/bpftool/prog.c >> @@ -2056,6 +2056,7 @@ static int profile_parse_metrics(int argc, char **argv) >> >> static void profile_read_values(struct profiler_bpf *obj) >> { >> + __u32 possible_cpus = libbpf_num_possible_cpus(); >> __u32 m, cpu, num_cpu = obj->rodata->num_cpu; >> int reading_map_fd, count_map_fd; >> __u64 counts[num_cpu]; >> @@ -2080,7 +2081,7 @@ static void profile_read_values(struct profiler_bpf *obj) >> profile_total_count += counts[cpu]; >> >> for (m = 0; m < ARRAY_SIZE(metrics); m++) { >> - struct bpf_perf_event_value values[num_cpu]; >> + struct bpf_perf_event_value values[possible_cpus]; >> >> if (!metrics[m].selected) >> continue; >> @@ -2321,7 +2322,7 @@ static int do_profile(int argc, char **argv) >> if (num_metric <= 0) >> goto out; >> >> - num_cpu = libbpf_num_possible_cpus(); >> + num_cpu = libbpf_num_online_cpus(); >> if (num_cpu <= 0) { >> p_err("failed to identify number of CPUs"); >> goto out; > > Thanks, but it doesn't seem to be enough to solve the issue. How did you > test it? With your series applied locally, I'm trying the following > (Intel x86_64, CPUs: 0..7): > > # echo 0 > /sys/devices/system/cpu/cpu2/online > # ./bpftool prog profile id 1525 duration 1 cycles instructions > Error: failed to create event cycles on cpu 2 > > It seems that we're still trying to open the perf events on the offline > CPU in profile_open_perf_events(), because even though we try to use > fewer of the possible CPUs we're still referencing them in order by Hi Thanks for your review and comment. I don’t test the case that one cpu is offline which is not last CPU. > their index. So it works if I only disabled the last CPUs instead (#7, > then #6, ...), but to work with _any_ CPU disabled, we would need to > retrieve the list of online CPUs. > Yes, In other way, to fix it, we can use the errno return by open_perf_event. If errno is ENODEV, we can skip this cpu to profile? The patch fix this issue. diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c index 620032042576..deec7196b48c 100644 --- a/tools/bpf/bpftool/common.c +++ b/tools/bpf/bpftool/common.c @@ -55,6 +55,24 @@ void p_err(const char *fmt, ...) va_end(ap); } +void p_warn(const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + if (json_output) { + jsonw_start_object(json_wtr); + jsonw_name(json_wtr, "warning"); + jsonw_vprintf_enquote(json_wtr, fmt, ap); + jsonw_end_object(json_wtr); + } else { + fprintf(stderr, "Warn: "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + } + va_end(ap); +} + void p_info(const char *fmt, ...) { va_list ap; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index a84224b6a604..e62edec9e13a 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -86,6 +86,7 @@ extern struct btf *base_btf; extern struct hashmap *refs_table; void __printf(1, 2) p_err(const char *fmt, ...); +void __printf(1, 2) p_warn(const char *fmt, ...); void __printf(1, 2) p_info(const char *fmt, ...); bool is_prefix(const char *pfx, const char *str); diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index cfc9fdc1e863..d9363ba01ec0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2233,10 +2233,36 @@ static void profile_close_perf_events(struct profiler_bpf *obj) profile_perf_event_cnt = 0; } +static int profile_open_perf_event(int mid, int cpu, int map_fd) +{ + int pmu_fd; + + pmu_fd = syscall(__NR_perf_event_open, &metrics[mid].attr, + -1/*pid*/, cpu, -1/*group_fd*/, 0); + if (pmu_fd < 0) { + if (errno == ENODEV) { + p_warn("cpu %d may be offline, skip %s metric profiling.", + cpu, metrics[mid].name); + profile_perf_event_cnt++; + return 0; + } + return -1; + } + + if (bpf_map_update_elem(map_fd, + &profile_perf_event_cnt, + &pmu_fd, BPF_ANY) || + ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) + return -1; + + profile_perf_events[profile_perf_event_cnt++] = pmu_fd; + return 0; +} + static int profile_open_perf_events(struct profiler_bpf *obj) { unsigned int cpu, m; - int map_fd, pmu_fd; + int map_fd; profile_perf_events = calloc( sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); @@ -2255,17 +2281,11 @@ static int profile_open_perf_events(struct profiler_bpf *obj) if (!metrics[m].selected) continue; for (cpu = 0; cpu < obj->rodata->num_cpu; cpu++) { - pmu_fd = syscall(__NR_perf_event_open, &metrics[m].attr, - -1/*pid*/, cpu, -1/*group_fd*/, 0); - if (pmu_fd < 0 || - bpf_map_update_elem(map_fd, &profile_perf_event_cnt, - &pmu_fd, BPF_ANY) || - ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0)) { + if (profile_open_perf_event(m, cpu, map_fd)) { p_err("failed to create event %s on cpu %d", metrics[m].name, cpu); return -1; } - profile_perf_events[profile_perf_event_cnt++] = pmu_fd; } } return 0; ---- Best Regards, Tonghao <tong@xxxxxxxxxxxxx>