[PATCH bpf-next v3 07/13] bpf: Adapt copy_map_value for multiple offset case

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Since now there might be at most 10 offsets that need handling in
copy_map_value, the manual shuffling and special case is no longer going
to work. Hence, let's generalise the copy_map_value function by using
a sorted array of offsets to skip regions that must be avoided while
copying into and out of a map value.

When the map is created, we populate the offset array in struct map,
with one extra element for map->value_size, which is used as the final
offset to subtract previous offset from. Then, copy_map_value uses this
sorted offset array is used to memcpy while skipping timer, spin lock,
and kptr.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
---
 include/linux/bpf.h  | 55 +++++++++++++++++++++++---------------------
 kernel/bpf/syscall.c | 52 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 81 insertions(+), 26 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9d424d567dd3..6474d2d44b78 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -158,6 +158,10 @@ struct bpf_map_ops {
 enum {
 	/* Support at most 8 pointers in a BPF map value */
 	BPF_MAP_VALUE_OFF_MAX = 8,
+	BPF_MAP_OFF_ARR_MAX   = BPF_MAP_VALUE_OFF_MAX +
+				1 + /* for bpf_spin_lock */
+				1 + /* for bpf_timer */
+				1,  /* for map->value_size sentinel */
 };
 
 enum {
@@ -206,9 +210,17 @@ struct bpf_map {
 	char name[BPF_OBJ_NAME_LEN];
 	bool bypass_spec_v1;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
-	/* 6 bytes hole */
-
-	/* The 3rd and 4th cacheline with misc members to avoid false sharing
+	/* 2 bytes hole */
+	struct {
+		struct {
+			u32 off;
+			u8 sz;
+		} field[BPF_MAP_OFF_ARR_MAX];
+		u32 cnt;
+	} off_arr;
+	/* 40 bytes hole */
+
+	/* The 4th and 5th cacheline with misc members to avoid false sharing
 	 * particularly with refcounting.
 	 */
 	atomic64_t refcnt ____cacheline_aligned;
@@ -250,36 +262,27 @@ static inline void check_and_init_map_value(struct bpf_map *map, void *dst)
 		memset(dst + map->spin_lock_off, 0, sizeof(struct bpf_spin_lock));
 	if (unlikely(map_value_has_timer(map)))
 		memset(dst + map->timer_off, 0, sizeof(struct bpf_timer));
+	if (unlikely(map_value_has_kptr(map))) {
+		struct bpf_map_value_off *tab = map->kptr_off_tab;
+		int i;
+
+		for (i = 0; i < tab->nr_off; i++)
+			*(u64 *)(dst + tab->off[i].offset) = 0;
+	}
 }
 
 /* copy everything but bpf_spin_lock and bpf_timer. There could be one of each. */
 static inline void copy_map_value(struct bpf_map *map, void *dst, void *src)
 {
-	u32 s_off = 0, s_sz = 0, t_off = 0, t_sz = 0;
+	int i;
 
-	if (unlikely(map_value_has_spin_lock(map))) {
-		s_off = map->spin_lock_off;
-		s_sz = sizeof(struct bpf_spin_lock);
-	}
-	if (unlikely(map_value_has_timer(map))) {
-		t_off = map->timer_off;
-		t_sz = sizeof(struct bpf_timer);
-	}
+	memcpy(dst, src, map->off_arr.field[0].off);
+	for (i = 1; i < map->off_arr.cnt; i++) {
+		u32 curr_off = map->off_arr.field[i - 1].off;
+		u32 next_off = map->off_arr.field[i].off;
 
-	if (unlikely(s_sz || t_sz)) {
-		if (s_off < t_off || !s_sz) {
-			swap(s_off, t_off);
-			swap(s_sz, t_sz);
-		}
-		memcpy(dst, src, t_off);
-		memcpy(dst + t_off + t_sz,
-		       src + t_off + t_sz,
-		       s_off - t_off - t_sz);
-		memcpy(dst + s_off + s_sz,
-		       src + s_off + s_sz,
-		       map->value_size - s_off - s_sz);
-	} else {
-		memcpy(dst, src, map->value_size);
+		curr_off += map->off_arr.field[i - 1].sz;
+		memcpy(dst + curr_off, src + curr_off, next_off - curr_off);
 	}
 }
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5990d6fa97ab..7b32537bd81f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -30,6 +30,7 @@
 #include <linux/pgtable.h>
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
+#include <linux/sort.h>
 #include <linux/bpf-netns.h>
 #include <linux/rcupdate_trace.h>
 #include <linux/memcontrol.h>
@@ -851,6 +852,55 @@ int map_check_no_btf(const struct bpf_map *map,
 	return -ENOTSUPP;
 }
 
+static int map_off_arr_cmp(const void *_a, const void *_b)
+{
+	const u32 a = *(const u32 *)_a;
+	const u32 b = *(const u32 *)_b;
+
+	if (a < b)
+		return -1;
+	else if (a > b)
+		return 1;
+	return 0;
+}
+
+static void map_populate_off_arr(struct bpf_map *map)
+{
+	u32 i;
+
+	map->off_arr.cnt = 0;
+	if (map_value_has_spin_lock(map)) {
+		i = map->off_arr.cnt;
+
+		map->off_arr.field[i].off = map->spin_lock_off;
+		map->off_arr.field[i].sz = sizeof(struct bpf_spin_lock);
+		map->off_arr.cnt++;
+	}
+	if (map_value_has_timer(map)) {
+		i = map->off_arr.cnt;
+
+		map->off_arr.field[i].off = map->timer_off;
+		map->off_arr.field[i].sz = sizeof(struct bpf_timer);
+		map->off_arr.cnt++;
+	}
+	if (map_value_has_kptr(map)) {
+		struct bpf_map_value_off *tab = map->kptr_off_tab;
+		u32 j = map->off_arr.cnt;
+
+		for (i = 0; i < tab->nr_off; i++) {
+			map->off_arr.field[j + i].off = tab->off[i].offset;
+			map->off_arr.field[j + i].sz = sizeof(u64);
+		}
+		map->off_arr.cnt += tab->nr_off;
+	}
+
+	map->off_arr.field[map->off_arr.cnt++].off = map->value_size;
+	if (map->off_arr.cnt == 1)
+		return;
+	sort(map->off_arr.field, map->off_arr.cnt, sizeof(map->off_arr.field[0]),
+	     map_off_arr_cmp, NULL);
+}
+
 static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 			 u32 btf_key_id, u32 btf_value_id)
 {
@@ -1018,6 +1068,8 @@ static int map_create(union bpf_attr *attr)
 			attr->btf_vmlinux_value_type_id;
 	}
 
+	map_populate_off_arr(map);
+
 	err = security_bpf_map_alloc(map);
 	if (err)
 		goto free_map;
-- 
2.35.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux