[PATCH bpf-next v2 08/15] bpf: Adapt copy_map_value for multiple offset case

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since now there might be at most 10 offsets that need handling in
copy_map_value, the manual shuffling and special case is no longer going
to work. Hence, let's generalise the copy_map_value function by using
a sorted array of offsets to skip regions that must be avoided while
copying into and out of a map value.

When the map is created, we populate the offset array in struct map,
with one extra element for map->value_size, which is used as the final
offset to subtract previous offset from. Since there can only be three
sizes, we can avoid recording the size in the struct map, and only store
sorted offsets. Later we can determine the size for each offset by
comparing it to timer_off and spin_lock_off, otherwise it must be
sizeof(u64) for kptr.

Then, copy_map_value uses this sorted offset array is used to memcpy
while skipping timer, spin lock, and kptr.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
---
 include/linux/bpf.h  | 59 +++++++++++++++++++++++++-------------------
 kernel/bpf/syscall.c | 47 +++++++++++++++++++++++++++++++++++
 2 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 8ac3070aa5e6..f0f1e0d3bb2e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -158,6 +158,10 @@ struct bpf_map_ops {
 enum {
 	/* Support at most 8 pointers in a BPF map value */
 	BPF_MAP_VALUE_OFF_MAX = 8,
+	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
+				1 + /* for bpf_spin_lock */
+				1 + /* for bpf_timer */
+				1,  /* for map->value_size sentinel */
 };
 
 enum {
@@ -208,7 +212,12 @@ struct bpf_map {
 	char name[BPF_OBJ_NAME_LEN];
 	bool bypass_spec_v1;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
-	/* 6 bytes hole */
+	/* 2 bytes hole */
+	struct {
+		u32 off[BPF_MAP_OFF_ARR_MAX];
+		u32 cnt;
+	} off_arr;
+	/* 20 bytes hole */
 
 	/* The 3rd and 4th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
@@ -252,36 +261,34 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
 	if (unlikely(map_value_has_timer(map)))
 		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
+	if (unlikely(map_value_has_kptr(map))) {
+		struct bpf_map_value_off *tab = map->kptr_off_tab;
+		int i;
+
+		for (i = 0; i < tab->nr_off; i++)
+			*(u64 *)(dst + tab->off[i].offset) = 0;
+	}
 }
 
 /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
 static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 {
-	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
-
-	if (unlikely(map_value_has_spin_lock(map))) {
-		s_off = map->spin_lock_off;
-		s_sz = sizeof(struct bpf_spin_lock);
-	}
-	if (unlikely(map_value_has_timer(map))) {
-		t_off = map->timer_off;
-		t_sz = sizeof(struct bpf_timer);
-	}
-
-	if (unlikely(s_sz || t_sz)) {
-		if (s_off < t_off || !s_sz) {
-			swap(s_off, t_off);
-			swap(s_sz, t_sz);
-		}
-		memcpy(dst, src, t_off);
-		memcpy(dst + t_off + t_sz,
-		       src + t_off + t_sz,
-		       s_off - t_off - t_sz);
-		memcpy(dst + s_off + s_sz,
-		       src + s_off + s_sz,
-		       map->value_size - s_off - s_sz);
-	} else {
-		memcpy(dst, src, map->value_size);
+	int i;
+
+	memcpy(dst, src, map->off_arr.off[0]);
+	for (i = 1; i < map->off_arr.cnt; i++) {
+		u32 curr_off = map->off_arr.off[i - 1];
+		u32 next_off = map->off_arr.off[i];
+		u32 curr_sz;
+
+		if (map_value_has_spin_lock(map) && map->spin_lock_off == curr_off)
+			curr_sz = sizeof(struct bpf_spin_lock);
+		else if (map_value_has_timer(map) && map->timer_off == curr_off)
+			curr_sz = sizeof(struct bpf_timer);
+		else
+			curr_sz = sizeof(u64);
+		curr_off += curr_sz;
+		memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
 	}
 }
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 87263b07f40b..69e8ea1be432 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -30,6 +30,7 @@
 #include <linux/pgtable.h>
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
+#include <linux/sort.h>
 #include <linux/bpf-netns.h>
 #include <linux/rcupdate_trace.h>
 #include <linux/memcontrol.h>
@@ -850,6 +851,50 @@ int map_check_no_btf(const struct bpf_map *map,
 	return -ENOTSUPP;
 }
 
+static int map_off_arr_cmp(const void *_a, const void *_b)
+{
+	const u32 a = *(const u32 *)_a;
+	const u32 b = *(const u32 *)_b;
+
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	return 0;
+}
+
+static void map_populate_off_arr(struct bpf_map *map)
+{
+	u32 i;
+
+	map->off_arr.cnt = 0;
+	if (map_value_has_spin_lock(map)) {
+		i = map->off_arr.cnt;
+
+		map->off_arr.off[i] = map->spin_lock_off;
+		map->off_arr.cnt++;
+	}
+	if (map_value_has_timer(map)) {
+		i = map->off_arr.cnt;
+
+		map->off_arr.off[i] = map->timer_off;
+		map->off_arr.cnt++;
+	}
+	if (map_value_has_kptr(map)) {
+		struct bpf_map_value_off *tab = map->kptr_off_tab;
+		u32 j = map->off_arr.cnt;
+
+		for (i = 0; i < tab->nr_off; i++)
+			map->off_arr.off[j + i] = tab->off[i].offset;
+		map->off_arr.cnt += tab->nr_off;
+	}
+
+	map->off_arr.off[map->off_arr.cnt++] = map->value_size;
+	if (map->off_arr.cnt == 1)
+		return;
+	sort(map->off_arr.off, map->off_arr.cnt, sizeof(map->off_arr.off[0]), map_off_arr_cmp, NULL);
+}
+
 static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 			 u32 btf_key_id, u32 btf_value_id)
 {
@@ -1015,6 +1060,8 @@ static int map_create(union bpf_attr *attr)
 			attr->btf_vmlinux_value_type_id;
 	}
 
+	map_populate_off_arr(map);
+
 	err = security_bpf_map_alloc(map);
 	if (err)
 		goto free_map;
-- 
2.35.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux