[PATCH bpf-next 1/3] bpf: Single-cpu updates for per-cpu maps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently, userspace programs have to update the values of all CPUs at
once when updating per-cpu maps.  This limitation prevents the update of
a single CPU's value without the risk of missing concurrent updates on
other CPU's values.

This patch allows userspace to update the value of a specific CPU in
per-cpu maps.  The CPU whose value should be updated is encoded in the
32 upper-bits of the flags argument, as follows.  The new BPF_CPU flag
can be combined with existing flags.

  bpf_map_update_elem(..., cpuid << 32 | BPF_CPU)

Signed-off-by: Paul Chaignon <paul.chaignon@xxxxxxxxxx>
---
 include/uapi/linux/bpf.h       |  4 +++
 kernel/bpf/arraymap.c          | 19 ++++++++-----
 kernel/bpf/hashtab.c           | 49 ++++++++++++++++++++--------------
 kernel/bpf/local_storage.c     | 16 +++++++----
 kernel/bpf/syscall.c           | 17 +++++++++---
 tools/include/uapi/linux/bpf.h |  4 +++
 6 files changed, 74 insertions(+), 35 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index dbbcf0b02970..2efb17d2c77a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -316,6 +316,10 @@ enum bpf_attach_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 #define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
+#define BPF_CPU		8 /* single-cpu update for per-cpu maps */
+
+/* CPU mask for single-cpu updates */
+#define BPF_CPU_MASK	0xFFFFFFFF00000000ULL
 
 /* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index f0d19bbb9211..a96e94696819 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -302,7 +302,8 @@ static int array_map_update_elem(struct bpf_map *map, void *key, void *value,
 	u32 index = *(u32 *)key;
 	char *val;
 
-	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_F_LOCK &
+				  ~BPF_CPU) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -341,7 +342,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 	int cpu, off = 0;
 	u32 size;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_CPU) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -349,7 +350,7 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 		/* all elements were pre-allocated, cannot insert a new one */
 		return -E2BIG;
 
-	if (unlikely(map_flags == BPF_NOEXIST))
+	if (unlikely(map_flags & BPF_NOEXIST))
 		/* all elements already exist */
 		return -EEXIST;
 
@@ -362,9 +363,15 @@ int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
 	size = round_up(map->value_size, 8);
 	rcu_read_lock();
 	pptr = array->pptrs[index & array->index_mask];
-	for_each_possible_cpu(cpu) {
-		bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off, size);
-		off += size;
+	if (map_flags & BPF_CPU) {
+		bpf_long_memcpy(per_cpu_ptr(pptr, map_flags >> 32), value,
+				size);
+	} else {
+		for_each_possible_cpu(cpu) {
+			bpf_long_memcpy(per_cpu_ptr(pptr, cpu), value + off,
+					size);
+			off += size;
+		}
 	}
 	rcu_read_unlock();
 	return 0;
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 22066a62c8c9..be45c7c4509f 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -695,12 +695,12 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 }
 
 static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
-			    void *value, bool onallcpus)
+			    void *value, int cpuid)
 {
-	if (!onallcpus) {
+	if (cpuid == -1) {
 		/* copy true value_size bytes */
 		memcpy(this_cpu_ptr(pptr), value, htab->map.value_size);
-	} else {
+	} else if (cpuid == -2) {
 		u32 size = round_up(htab->map.value_size, 8);
 		int off = 0, cpu;
 
@@ -709,6 +709,10 @@ static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
 					value + off, size);
 			off += size;
 		}
+	} else {
+		u32 size = round_up(htab->map.value_size, 8);
+
+		bpf_long_memcpy(per_cpu_ptr(pptr, cpuid), value, size);
 	}
 }
 
@@ -720,7 +724,7 @@ static bool fd_htab_map_needs_adjust(const struct bpf_htab *htab)
 
 static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 					 void *value, u32 key_size, u32 hash,
-					 bool percpu, bool onallcpus,
+					 bool percpu, int cpuid,
 					 struct htab_elem *old_elem)
 {
 	u32 size = htab->map.value_size;
@@ -781,7 +785,7 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 			}
 		}
 
-		pcpu_copy_value(htab, pptr, value, onallcpus);
+		pcpu_copy_value(htab, pptr, value, cpuid);
 
 		if (!prealloc)
 			htab_elem_set_ptr(l_new, key_size, pptr);
@@ -804,11 +808,11 @@ static struct htab_elem *alloc_htab_elem(struct bpf_htab *htab, void *key,
 static int check_flags(struct bpf_htab *htab, struct htab_elem *l_old,
 		       u64 map_flags)
 {
-	if (l_old && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
+	if (l_old && (map_flags & BPF_NOEXIST) == BPF_NOEXIST)
 		/* elem already exists */
 		return -EEXIST;
 
-	if (!l_old && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
+	if (!l_old && (map_flags & BPF_EXIST) == BPF_EXIST)
 		/* elem doesn't exist, cannot update it */
 		return -ENOENT;
 
@@ -827,7 +831,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
 	u32 key_size, hash;
 	int ret;
 
-	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_F_LOCK &
+				  ~BPF_CPU) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -919,7 +924,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 	u32 key_size, hash;
 	int ret;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_CPU) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -974,7 +979,7 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value,
 
 static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 					 void *value, u64 map_flags,
-					 bool onallcpus)
+					 int cpuid)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
@@ -984,7 +989,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 	u32 key_size, hash;
 	int ret;
 
-	if (unlikely(map_flags > BPF_EXIST))
+	if (unlikely((map_flags & ~BPF_CPU_MASK & ~BPF_CPU) > BPF_EXIST))
 		/* unknown flags */
 		return -EINVAL;
 
@@ -1009,10 +1014,10 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 	if (l_old) {
 		/* per-cpu hash map can update value in-place */
 		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
-				value, onallcpus);
+				value, cpuid);
 	} else {
 		l_new = alloc_htab_elem(htab, key, value, key_size,
-					hash, true, onallcpus, NULL);
+					hash, true, cpuid, NULL);
 		if (IS_ERR(l_new)) {
 			ret = PTR_ERR(l_new);
 			goto err;
@@ -1027,7 +1032,7 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 
 static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 					     void *value, u64 map_flags,
-					     bool onallcpus)
+					     int cpuid)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
 	struct htab_elem *l_new = NULL, *l_old;
@@ -1075,10 +1080,10 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 
 		/* per-cpu hash map can update value in-place */
 		pcpu_copy_value(htab, htab_elem_get_ptr(l_old, key_size),
-				value, onallcpus);
+				value, cpuid);
 	} else {
 		pcpu_copy_value(htab, htab_elem_get_ptr(l_new, key_size),
-				value, onallcpus);
+				value, cpuid);
 		hlist_nulls_add_head_rcu(&l_new->hash_node, head);
 		l_new = NULL;
 	}
@@ -1093,14 +1098,14 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
 				       void *value, u64 map_flags)
 {
-	return __htab_percpu_map_update_elem(map, key, value, map_flags, false);
+	return __htab_percpu_map_update_elem(map, key, value, map_flags, -1);
 }
 
 static int htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key,
 					   void *value, u64 map_flags)
 {
 	return __htab_lru_percpu_map_update_elem(map, key, value, map_flags,
-						 false);
+						 -1);
 }
 
 /* Called from syscall or from eBPF program */
@@ -1316,15 +1321,19 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
 			   u64 map_flags)
 {
 	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	int cpuid = -2; /* update values of all cpus */
 	int ret;
 
+	if (map_flags & BPF_CPU)
+		cpuid = map_flags >> 32;
+
 	rcu_read_lock();
 	if (htab_is_lru(htab))
 		ret = __htab_lru_percpu_map_update_elem(map, key, value,
-							map_flags, true);
+							map_flags, cpuid);
 	else
 		ret = __htab_percpu_map_update_elem(map, key, value, map_flags,
-						    true);
+						    cpuid);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/kernel/bpf/local_storage.c b/kernel/bpf/local_storage.c
index 2ba750725cb2..2f93d75fc74b 100644
--- a/kernel/bpf/local_storage.c
+++ b/kernel/bpf/local_storage.c
@@ -206,7 +206,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
 	int cpu, off = 0;
 	u32 size;
 
-	if (map_flags != BPF_ANY && map_flags != BPF_EXIST)
+	if (map_flags & ~BPF_CPU_MASK & ~BPF_CPU & ~BPF_EXIST)
 		return -EINVAL;
 
 	rcu_read_lock();
@@ -223,10 +223,16 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *_map, void *_key,
 	 * so no kernel data leaks possible
 	 */
 	size = round_up(_map->value_size, 8);
-	for_each_possible_cpu(cpu) {
-		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
-				value + off, size);
-		off += size;
+	if (map_flags & BPF_CPU) {
+		cpu = map_flags >> 32;
+		bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu), value,
+				size);
+	} else {
+		for_each_possible_cpu(cpu) {
+			bpf_long_memcpy(per_cpu_ptr(storage->percpu_buf, cpu),
+					value + off, size);
+			off += size;
+		}
 	}
 	rcu_read_unlock();
 	return 0;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b08c362f4e02..83e190929f2e 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -953,7 +953,9 @@ static int map_update_elem(union bpf_attr *attr)
 	struct bpf_map *map;
 	void *key, *value;
 	u32 value_size;
+	bool per_cpu;
 	struct fd f;
+	int cpuid;
 	int err;
 
 	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
@@ -974,16 +976,23 @@ static int map_update_elem(union bpf_attr *attr)
 		goto err_put;
 	}
 
+	per_cpu = map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+		  map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
+		  map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
+		  map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE;
+	if ((attr->flags & BPF_CPU) &&
+	    (!per_cpu || (attr->flags >> 32) >= num_possible_cpus())) {
+		err = -EINVAL;
+		goto err_put;
+	}
+
 	key = __bpf_copy_key(ukey, map->key_size);
 	if (IS_ERR(key)) {
 		err = PTR_ERR(key);
 		goto err_put;
 	}
 
-	if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY ||
-	    map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE)
+	if (per_cpu && !(attr->flags & BPF_CPU))
 		value_size = round_up(map->value_size, 8) * num_possible_cpus();
 	else
 		value_size = map->value_size;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index dbbcf0b02970..2efb17d2c77a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -316,6 +316,10 @@ enum bpf_attach_type {
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */
 #define BPF_F_LOCK	4 /* spin_lock-ed map_lookup/map_update */
+#define BPF_CPU		8 /* single-cpu update for per-cpu maps */
+
+/* CPU mask for single-cpu updates */
+#define BPF_CPU_MASK	0xFFFFFFFF00000000ULL
 
 /* flags for BPF_MAP_CREATE command */
 #define BPF_F_NO_PREALLOC	(1U << 0)
-- 
2.24.0




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux