The bpf_prog_active counter is used to avoid recursion on the same CPU. On RT we can't keep it with the preempt-disable part because the syscall may need to acquire locks or allocate memory. Use a locallock() to avoid recursion on the same CPU. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- include/linux/bpf.h | 2 ++ kernel/bpf/hashtab.c | 4 ++-- kernel/bpf/syscall.c | 13 +++++++------ kernel/events/core.c | 3 ++- kernel/trace/bpf_trace.c | 5 ++--- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e734f163bd0b9..667f45de65be8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -16,6 +16,7 @@ #include <linux/rbtree_latch.h> #include <linux/numa.h> #include <linux/wait.h> +#include <linux/locallock.h> struct bpf_verifier_env; struct perf_event; @@ -467,6 +468,7 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +DECLARE_LOCAL_IRQ_LOCK(bpf_prog_active_lock); extern const struct file_operations bpf_map_fops; extern const struct file_operations bpf_prog_fops; diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index b4f903a5ef36e..15120d2d8b659 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -668,11 +668,11 @@ static void htab_elem_free_rcu(struct rcu_head *head) * we're calling kfree, otherwise deadlock is possible if kprobes * are placed somewhere inside of slub */ - preempt_disable(); + local_lock(bpf_prog_active_lock); __this_cpu_inc(bpf_prog_active); htab_elem_free(htab, l); __this_cpu_dec(bpf_prog_active); - preempt_enable(); + local_unlock(bpf_prog_active_lock); } static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 84470d1480aa4..73f2edbe3b28c 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -42,6 +42,7 @@ #define BPF_OBJ_FLAG_MASK (BPF_F_RDONLY | BPF_F_WRONLY) DEFINE_PER_CPU(int, bpf_prog_active); +DEFINE_LOCAL_IRQ_LOCK(bpf_prog_active_lock); static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); @@ -716,7 +717,7 @@ static int map_lookup_elem(union bpf_attr *attr) goto done; } - preempt_disable(); + local_lock(bpf_prog_active_lock); this_cpu_inc(bpf_prog_active); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -750,7 +751,7 @@ static int map_lookup_elem(union bpf_attr *attr) rcu_read_unlock(); } this_cpu_dec(bpf_prog_active); - preempt_enable(); + local_unlock(bpf_prog_active_lock); done: if (err) @@ -845,7 +846,7 @@ static int map_update_elem(union bpf_attr *attr) /* must increment bpf_prog_active to avoid kprobe+bpf triggering from * inside bpf map update or delete otherwise deadlocks are possible */ - preempt_disable(); + local_lock(bpf_prog_active_lock); __this_cpu_inc(bpf_prog_active); if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) { @@ -878,7 +879,7 @@ static int map_update_elem(union bpf_attr *attr) rcu_read_unlock(); } __this_cpu_dec(bpf_prog_active); - preempt_enable(); + local_unlock(bpf_prog_active_lock); maybe_wait_bpf_programs(map); out: free_value: @@ -925,13 +926,13 @@ static int map_delete_elem(union bpf_attr *attr) goto out; } - preempt_disable(); + local_lock(bpf_prog_active_lock); __this_cpu_inc(bpf_prog_active); rcu_read_lock(); err = map->ops->map_delete_elem(map, key); rcu_read_unlock(); __this_cpu_dec(bpf_prog_active); - preempt_enable(); + local_unlock(bpf_prog_active_lock); maybe_wait_bpf_programs(map); out: kfree(key); diff --git a/kernel/events/core.c b/kernel/events/core.c index b3155a155a645..6facb80af7c0e 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8546,7 +8546,7 @@ static void bpf_overflow_handler(struct perf_event *event, int ret = 0; ctx.regs = perf_arch_bpf_user_pt_regs(regs); - preempt_disable(); + local_lock(bpf_prog_active_lock); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) goto out; rcu_read_lock(); @@ -8555,6 +8555,7 @@ static void bpf_overflow_handler(struct perf_event *event, out: __this_cpu_dec(bpf_prog_active); preempt_enable(); + local_unlock(bpf_prog_active_lock); if (!ret) return; diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index f1a86a0d881dd..bb92cf31481b4 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -78,8 +78,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) if (in_nmi()) /* not supported yet */ return 1; - preempt_disable(); - + local_lock(bpf_prog_active_lock); if (unlikely(__this_cpu_inc_return(bpf_prog_active) != 1)) { /* * since some bpf program is already running on this cpu, @@ -110,7 +109,7 @@ unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) out: __this_cpu_dec(bpf_prog_active); - preempt_enable(); + local_unlock(bpf_prog_active_lock); return ret; } -- 2.20.1