Re: [PATCH v3 bpf-next 1/2] bpf: Patch to Fix deadlocks in queue and stack maps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 5/14/24 08:40, Siddharth Chintamaneni wrote:
[...]
+static inline int map_lock_inc(struct bpf_queue_stack *qs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	if (unlikely(__this_cpu_inc_return(*(qs->map_locked)) != 1)) {
+		__this_cpu_dec(*(qs->map_locked));
+		local_irq_restore(flags);
+		preempt_enable();
+		return -EBUSY;
+	}
+
+	local_irq_restore(flags);
+	preempt_enable();

it looks like you're taking the approach from kernel/bpf/hashtab.c to use a per-cpu lock before grabbing the real lock. but in the success case here (where you incremented the percpu counter), you're enabling irqs and preemption.

what happens if you get preempted right after this? you've left the per-cpu bit set, but then you run on another cpu.

possible alternative: instead of splitting the overall lock into "grab percpu lock, then grab real lock", have a single function for both, similar to htab_lock_bucket(). and keep irqs and preemption off from the moment you start attempting the overall lock until you completely unlock.

barret


+
+	return 0;
+}
+
+static inline void map_unlock_dec(struct bpf_queue_stack *qs)
+{
+	unsigned long flags;
+
+	preempt_disable();
+	local_irq_save(flags);
+	__this_cpu_dec(*(qs->map_locked));
+	local_irq_restore(flags);
+	preempt_enable();
+}
+
  static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
  {
  	struct bpf_queue_stack *qs = bpf_queue_stack(map);
  	unsigned long flags;
  	int err = 0;
  	void *ptr;
+	int ret;
+
+	ret = map_lock_inc(qs);
+	if (ret)
+		return ret;
if (in_nmi()) {
-		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
+		if (!raw_spin_trylock_irqsave(&qs->lock, flags)) {
+			map_unlock_dec(qs);
  			return -EBUSY;
+		}
  	} else {
  		raw_spin_lock_irqsave(&qs->lock, flags);
  	}
@@ -121,6 +170,8 @@ static long __queue_map_get(struct bpf_map *map, void *value, bool delete)
out:
  	raw_spin_unlock_irqrestore(&qs->lock, flags);
+	map_unlock_dec(qs);
+
  	return err;
  }
@@ -132,10 +183,17 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
  	int err = 0;
  	void *ptr;
  	u32 index;
+	int ret;
+
+	ret = map_lock_inc(qs);
+	if (ret)
+		return ret;
if (in_nmi()) {
-		if (!raw_spin_trylock_irqsave(&qs->lock, flags))
+		if (!raw_spin_trylock_irqsave(&qs->lock, flags)) {
+			map_unlock_dec(qs);
  			return -EBUSY;
+		}
  	} else {
  		raw_spin_lock_irqsave(&qs->lock, flags);
  	}
@@ -158,6 +216,8 @@ static long __stack_map_get(struct bpf_map *map, void *value, bool delete)
out:
  	raw_spin_unlock_irqrestore(&qs->lock, flags);
+	map_unlock_dec(qs);
+
  	return err;
  }
@@ -193,6 +253,7 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
  	unsigned long irq_flags;
  	int err = 0;
  	void *dst;
+	int ret;
/* BPF_EXIST is used to force making room for a new element in case the
  	 * map is full
@@ -203,9 +264,16 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
  	if (flags & BPF_NOEXIST || flags > BPF_EXIST)
  		return -EINVAL;
+
+	ret = map_lock_inc(qs);
+	if (ret)
+		return ret;
+
  	if (in_nmi()) {
-		if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags))
+		if (!raw_spin_trylock_irqsave(&qs->lock, irq_flags)) {
+			map_unlock_dec(qs);
  			return -EBUSY;
+		}
  	} else {
  		raw_spin_lock_irqsave(&qs->lock, irq_flags);
  	}
@@ -228,6 +296,8 @@ static long queue_stack_map_push_elem(struct bpf_map *map, void *value,
out:
  	raw_spin_unlock_irqrestore(&qs->lock, irq_flags);
+	map_unlock_dec(qs);
+
  	return err;
  }





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux