[PATCH bpf-next v1 08/15] bpf: Adapt copy_map_value for multiple offset case

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The changes in this patch deserve closer look, so it has been split into
its own independent patch. While earlier we just had to skip two objects
at most while copying in and out of map, now we have potentially many
objects (at most 8 + 2 = 10, due to the BPF_MAP_VALUE_OFF_MAX limit).

Hence, divide the copy_map_value function into an inlined fast path and
function call to slowpath. The slowpath handles the case of > 3 offsets,
while we handle the most common cases (0, 1, 2, or 3 offsets) in the
inline function itself.

In copy_map_value_slow, we use 11 offsets, just to make the for loop
that copies the value free of edge cases for the last offset, by using
map->value_size as final offset to subtract remaining area to copy from.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
---
 include/linux/bpf.h  | 43 +++++++++++++++++++++++++++++++---
 kernel/bpf/syscall.c | 55 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ae599aaf8d4c..5d845ca02eba 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -253,12 +253,22 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
 	if (unlikely(map_value_has_timer(map)))
 		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
+	if (unlikely(map_value_has_ptr_to_btf_id(map))) {
+		struct bpf_map_value_off *tab = map->ptr_off_tab;
+		int i;
+
+		for (i = 0; i < tab->nr_off; i++)
+			*(u64 *)(dst + tab->off[i].offset) = 0;
+	}
 }
 
+void copy_map_value_slow(struct bpf_map *map, void *dst, void *src, u32 s_off,
+			 u32 s_sz, u32 t_off, u32 t_sz);
+
 /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
 static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 {
-	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
+	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0, p_off = 0, p_sz = 0;
 
 	if (unlikely(map_value_has_spin_lock(map))) {
 		s_off = map->spin_lock_off;
@@ -268,13 +278,40 @@ static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 		t_off = map->timer_off;
 		t_sz = sizeof(struct bpf_timer);
 	}
+	/* Multiple offset case is slow, offload to function */
+	if (unlikely(map_value_has_ptr_to_btf_id(map))) {
+		struct bpf_map_value_off *tab = map->ptr_off_tab;
+
+		/* Inline the likely common case */
+		if (likely(tab->nr_off == 1)) {
+			p_off = tab->off[0].offset;
+			p_sz = sizeof(u64);
+		} else {
+			copy_map_value_slow(map, dst, src, s_off, s_sz, t_off, t_sz);
+			return;
+		}
+	}
+
+	if (unlikely(s_sz || t_sz || p_sz)) {
+		/* The order is p_off, t_off, s_off, use insertion sort */
 
-	if (unlikely(s_sz || t_sz)) {
+		if (t_off < p_off || !t_sz) {
+			swap(t_off, p_off);
+			swap(t_sz, p_sz);
+		}
 		if (s_off < t_off || !s_sz) {
 			swap(s_off, t_off);
 			swap(s_sz, t_sz);
+			if (t_off < p_off || !t_sz) {
+				swap(t_off, p_off);
+				swap(t_sz, p_sz);
+			}
 		}
-		memcpy(dst, src, t_off);
+
+		memcpy(dst, src, p_off);
+		memcpy(dst + p_off + p_sz,
+		       src + p_off + p_sz,
+		       t_off - p_off - p_sz);
 		memcpy(dst + t_off + t_sz,
 		       src + t_off + t_sz,
 		       s_off - t_off - t_sz);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index beb96866f34d..83d71d6912f5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -30,6 +30,7 @@
 #include <linux/pgtable.h>
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
+#include <linux/sort.h>
 #include <linux/bpf-netns.h>
 #include <linux/rcupdate_trace.h>
 #include <linux/memcontrol.h>
@@ -230,6 +231,60 @@ static int bpf_map_update_value(struct bpf_map *map, struct fd f, void *key,
 	return err;
 }
 
+static int copy_map_value_cmp(const void *_a, const void *_b)
+{
+	const u32 a = *(const u32 *)_a;
+	const u32 b = *(const u32 *)_b;
+
+	/* We only need to sort based on offset */
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	return 0;
+}
+
+void copy_map_value_slow(struct bpf_map *map, void *dst, void *src, u32 s_off,
+			 u32 s_sz, u32 t_off, u32 t_sz)
+{
+	struct bpf_map_value_off *tab = map->ptr_off_tab; /* already set to non-NULL */
+	/* 3 = 2 for bpf_timer, bpf_spin_lock, 1 for map->value_size sentinel */
+	struct {
+		u32 off;
+		u32 sz;
+	} off_arr[BPF_MAP_VALUE_OFF_MAX + 3];
+	int i, cnt = 0;
+
+	/* Reconsider stack usage when bumping BPF_MAP_VALUE_OFF_MAX */
+	BUILD_BUG_ON(sizeof(off_arr) != 88);
+
+	for (i = 0; i < tab->nr_off; i++) {
+		off_arr[cnt].off = tab->off[i].offset;
+		off_arr[cnt++].sz = sizeof(u64);
+	}
+	if (s_sz) {
+		off_arr[cnt].off = s_off;
+		off_arr[cnt++].sz = s_sz;
+	}
+	if (t_sz) {
+		off_arr[cnt].off = t_off;
+		off_arr[cnt++].sz = t_sz;
+	}
+	off_arr[cnt].off = map->value_size;
+
+	sort(off_arr, cnt, sizeof(off_arr[0]), copy_map_value_cmp, NULL);
+
+	/* There is always at least one element */
+	memcpy(dst, src, off_arr[0].off);
+	/* Copy the rest, while skipping other regions */
+	for (i = 1; i < cnt; i++) {
+		u32 curr_off = off_arr[i - 1].off + off_arr[i - 1].sz;
+		u32 next_off = off_arr[i].off;
+
+		memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
+	}
+}
+
 static int bpf_map_copy_value(struct bpf_map *map, void *key, void *value,
 			      __u64 flags)
 {
-- 
2.35.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux