This patch is a revised version which addresses a possible deadlock issue in queue and stack map types. Deadlock could happen when a nested BPF program acquires the same lock as the parent BPF program to perform a write operation on the same map as the first one. This bug is also reported by syzbot. Link: https://lore.kernel.org/lkml/0000000000004c3fc90615f37756@xxxxxxxxxx/ Reported-by: syzbot+8bdfc2c53fb2b63e1871@xxxxxxxxxxxxxxxxxxxxxxxxx Fixes: f1a2e44a3aec ("bpf: add queue and stack maps") Signed-off-by: Siddharth Chintamaneni <sidchintamaneni@xxxxxxxxx> --- kernel/bpf/queue_stack_maps.c | 76 +++++++++++++++++++++++++++++++++-- 1 file changed, 73 insertions(+), 3 deletions(-) diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c index d869f51ea93a..b5ed76c9ddd7 100644 --- a/kernel/bpf/queue_stack_maps.c +++ b/kernel/bpf/queue_stack_maps.c @@ -13,11 +13,13 @@ #define QUEUE_STACK_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) + struct bpf_queue_stack { struct bpf_map map; raw_spinlock_t lock; u32 head, tail; u32 size; /* max_entries + 1 */ + int __percpu *map_locked; char elements[] __aligned(8); }; @@ -78,6 +80,15 @@ static struct bpf_map *queue_stack_map_alloc(union bpf_attr *attr) qs->size = size; + qs->map_locked = bpf_map_alloc_percpu(&qs->map, + sizeof(int), + sizeof(int), + GFP_USER | __GFP_NOWARN); + if (!qs->map_locked) { + bpf_map_area_free(qs); + return ERR_PTR(-ENOMEM); + } + raw_spin_lock_init(&qs->lock); return &qs->map; @@ -88,19 +99,57 @@ static void queue_stack_map_free(struct bpf_map *map) { struct bpf_queue_stack *qs = bpf_queue_stack(map); + free_percpu(qs->map_locked); bpf_map_area_free(qs); } +static inline int map_lock_inc(struct bpf_queue_stack *qs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + if (unlikely(__this_cpu_inc_return(*(qs->map_locked)) != 1)) { + __this_cpu_dec(*(qs->map_locked)); + local_irq_restore(flags); + preempt_enable(); + return -EBUSY; + } + + local_irq_restore(flags); + preempt_enable(); + + return 0; +} + +static inline void map_unlock_dec(struct bpf_queue_stack *qs) +{ + unsigned long flags; + + preempt_disable(); + local_irq_save(flags); + __this_cpu_dec(*(qs->map_locked)); + local_irq_restore(flags); + preempt_enable(); +} + static long __queue_map_get(struct bpf_map *map, void *value, bool delete) { struct bpf_queue_stack *qs = bpf_queue_stack(map); unsigned long flags; int err = 0; void *ptr; + int ret; + + ret = map_lock_inc(qs); + if (ret) + return ret; if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, flags)) + if (!raw_spin_trylock_irqsave(&qs->lock, flags)) { + map_unlock_dec(qs); return -EBUSY; + } } else { raw_spin_lock_irqsave(&qs->lock, flags); } @@ -121,6 +170,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete) out: raw_spin_unlock_irqrestore(&qs->lock, flags); + map_unlock_dec(qs); + return err; } @@ -132,10 +183,17 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) int err = 0; void *ptr; u32 index; + int ret; + + ret = map_lock_inc(qs); + if (ret) + return ret; if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, flags)) + if (!raw_spin_trylock_irqsave(&qs->lock, flags)) { + map_unlock_dec(qs); return -EBUSY; + } } else { raw_spin_lock_irqsave(&qs->lock, flags); } @@ -158,6 +216,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete) out: raw_spin_unlock_irqrestore(&qs->lock, flags); + map_unlock_dec(qs); + return err; } @@ -193,6 +253,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, unsigned long irq_flags; int err = 0; void *dst; + int ret; /* BPF_EXIST is used to force making room for a new element in case the * map is full @@ -203,9 +264,16 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, if (flags & BPF_NOEXIST || flags > BPF_EXIST) return -EINVAL; + + ret = map_lock_inc(qs); + if (ret) + return ret; + if (in_nmi()) { - if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) + if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) { + map_unlock_dec(qs); return -EBUSY; + } } else { raw_spin_lock_irqsave(&qs->lock, irq_flags); } @@ -228,6 +296,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value, out: raw_spin_unlock_irqrestore(&qs->lock, irq_flags); + map_unlock_dec(qs); + return err; } -- 2.44.0