Below recursion is observed in a rare scenario where __schedule() takes rq lock, at around same time task's affinity is being changed, bpf function for tracing sched_switch calls migrate_enabled(), checks for affinity change (cpus_ptr != cpus_mask) lands into __set_cpus_allowed_ptr which tries acquire rq lock and causing the recursion bug. Fix the issue by replacing migrate_enable/disable() with preempt_enable/disable(). -010 |spin_bug(lock = ???, msg = ???) -011 |debug_spin_lock_before(inline) -011 |do_raw_spin_lock(lock = 0xFFFFFF89323BB600) -012 |_raw_spin_lock(inline) -012 |raw_spin_rq_lock_nested(inline) -012 |raw_spin_rq_lock(inline) -012 |task_rq_lock(p = 0xFFFFFF88CFF1DA00, rf = 0xFFFFFFC03707BBE8) -013 |__set_cpus_allowed_ptr(inline) -013 |migrate_enable() -014 |trace_call_bpf(call = ?, ctx = 0xFFFFFFFDEF954600) -015 |perf_trace_run_bpf_submit(inline) -015 |perf_trace_sched_switch(__data = 0xFFFFFFE82CF0BCB8, preempt = FALSE, prev = ?, next = ?) -016 |__traceiter_sched_switch(inline) -016 |trace_sched_switch(inline) -016 |__schedule(sched_mode = ?) -017 |schedule() -018 |arch_local_save_flags(inline) -018 |arch_irqs_disabled(inline) -018 |__raw_spin_lock_irq(inline) -018 |_raw_spin_lock_irq(inline) -018 |worker_thread(__worker = 0xFFFFFF88CE251300) -019 |kthread(_create = 0xFFFFFF88730A5A80) -020 |ret_from_fork(asm) Signed-off-by: Satya Durga Srinivasu Prabhala <quic_satyap@xxxxxxxxxxx> --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b914a56a2c5..4ecb065140e9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1414,7 +1414,7 @@ bpf_prog_run_array(const struct bpf_prog_array *array, if (unlikely(!array)) return ret; - migrate_disable(); + preempt_disable(); old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); item = &array->items[0]; while ((prog = READ_ONCE(item->prog))) { @@ -1423,7 +1423,7 @@ bpf_prog_run_array(const struct bpf_prog_array *array, item++; } bpf_reset_run_ctx(old_run_ctx); - migrate_enable(); + preempt_enable(); return ret; } -- 2.36.1