[PATCH bpf-next 7/7] bpf: Wait for sleepable BPF program in maybe_wait_bpf_programs()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Hou Tao <houtao1@xxxxxxxxxx>

Since commit 638e4b825d52 ("bpf: Allows per-cpu maps and map-in-map in
sleepable programs"), sleepable BPF program can use map-in-map, but
maybe_wait_bpf_programs() doesn't consider it accordingly.

So checking the value of sleepable_refcnt in maybe_wait_bpf_programs(),
if the value is not zero, use synchronize_rcu_mult() to wait for both
sleepable and non-sleepable BPF programs. But bpf syscall from syscall
program is special, because the bpf syscall is called with
rcu_read_lock_trace() being held, and there will be dead-lock if
synchronize_rcu_mult() is used to wait for the exit of sleepable BPF
program, so just skip the waiting of sleepable BPF program for bpf
syscall which comes from syscall program.

Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>
---
 kernel/bpf/syscall.c | 28 +++++++++++++++++++---------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d2641e51a1a7..6b9d7990d95f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -35,6 +35,7 @@
 #include <linux/rcupdate_trace.h>
 #include <linux/memcontrol.h>
 #include <linux/trace_events.h>
+#include <linux/rcupdate_wait.h>
 
 #include <net/netfilter/nf_bpf_link.h>
 #include <net/netkit.h>
@@ -140,15 +141,24 @@ static u32 bpf_map_value_size(const struct bpf_map *map)
 		return  map->value_size;
 }
 
-static void maybe_wait_bpf_programs(struct bpf_map *map)
+static void maybe_wait_bpf_programs(struct bpf_map *map, bool rcu_trace_lock_held)
 {
-	/* Wait for any running BPF programs to complete so that
-	 * userspace, when we return to it, knows that all programs
-	 * that could be running use the new map value.
+	/* Wait for any running non-sleepable and sleepable BPF programs to
+	 * complete, so that userspace, when we return to it, knows that all
+	 * programs that could be running use the new map value. However
+	 * syscall program can also use bpf syscall to update or delete inner
+	 * map in outer map, and it holds rcu_read_lock_trace() before doing
+	 * the bpf syscall. If use synchronize_rcu_mult(call_rcu_tasks_trace)
+	 * to wait for the exit of running sleepable BPF programs, there will
+	 * be dead-lock, so skip the waiting for syscall program.
 	 */
 	if (map->map_type == BPF_MAP_TYPE_HASH_OF_MAPS ||
-	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
-		synchronize_rcu();
+	    map->map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS) {
+		if (atomic64_read(&map->sleepable_refcnt) && !rcu_trace_lock_held)
+			synchronize_rcu_mult(call_rcu, call_rcu_tasks_trace);
+		else
+			synchronize_rcu();
+	}
 }
 
 static int bpf_map_update_value(struct bpf_map *map, struct file *map_file,
@@ -1561,7 +1571,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr)
 
 	err = bpf_map_update_value(map, f.file, key, value, attr->flags);
 	if (!err)
-		maybe_wait_bpf_programs(map);
+		maybe_wait_bpf_programs(map, bpfptr_is_kernel(uattr));
 
 	kvfree(value);
 free_key:
@@ -1618,7 +1628,7 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr)
 	rcu_read_unlock();
 	bpf_enable_instrumentation();
 	if (!err)
-		maybe_wait_bpf_programs(map);
+		maybe_wait_bpf_programs(map, bpfptr_is_kernel(uattr));
 out:
 	kvfree(key);
 err_put:
@@ -4973,7 +4983,7 @@ static int bpf_map_do_batch(union bpf_attr *attr,
 err_put:
 	if (has_write) {
 		if (attr->batch.count)
-			maybe_wait_bpf_programs(map);
+			maybe_wait_bpf_programs(map, false);
 		bpf_map_write_active_dec(map);
 	}
 	fdput(f);
-- 
2.29.2





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux