Re: [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 3/13/21 12:54 AM, Denis Salopek wrote:
Hello,

Thank you for your feedback and comments.

On Mon, Mar 01, 2021 at 06:02:37PM -0800, Yonghong Song wrote:


On 2/28/21 1:40 AM, Denis Salopek wrote:
Extend the existing bpf_map_lookup_and_delete_elem() functionality to
hashtab maps, in addition to stacks and queues.
Create a new hashtab bpf_map_ops function that does lookup and deletion
of the element under the same bucket lock and add the created map_ops to
bpf.h.
Add the appropriate test cases to 'maps' and 'lru_map' selftests
accompanied with new test_progs.

Cc: Juraj Vijtiuk <juraj.vijtiuk@xxxxxxxxxx>
Cc: Luka Oreskovic <luka.oreskovic@xxxxxxxxxx>
Cc: Luka Perkov <luka.perkov@xxxxxxxxxx>
Signed-off-by: Denis Salopek <denis.salopek@xxxxxxxxxx>
---
v2: Add functionality for LRU/per-CPU, add test_progs tests.
---
   include/linux/bpf.h                           |   2 +
   kernel/bpf/hashtab.c                          |  80 +++++
   kernel/bpf/syscall.c                          |  14 +-
   .../bpf/prog_tests/lookup_and_delete.c        | 283 ++++++++++++++++++
   .../bpf/progs/test_lookup_and_delete.c        |  26 ++
   tools/testing/selftests/bpf/test_lru_map.c    |   8 +
   tools/testing/selftests/bpf/test_maps.c       |  19 +-
   7 files changed, 430 insertions(+), 2 deletions(-)
   create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
   create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4c730863fa77..0bcc4f89af40 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -67,6 +67,8 @@ struct bpf_map_ops {
   	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
   	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
   				union bpf_attr __user *uattr);
+	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
+					  void *value);
   	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
   					   const union bpf_attr *attr,
   					   union bpf_attr __user *uattr);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 330d721dd2af..8c3334d1b6b3 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
   	rcu_read_unlock();
   }
+static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					     void *value, bool is_lru_map,
+					     bool is_percpu)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	u32 hash, key_size, value_size;
+	struct hlist_nulls_head *head;
+	int cpu, off = 0, ret;
+	struct htab_elem *l;
+	unsigned long flags;
+	void __percpu *pptr;
+	struct bucket *b;
+
+	key_size = map->key_size;
+	value_size = round_up(map->value_size, 8);
+
+	hash = htab_map_hash(key, key_size, htab->hashrnd);
+	b = __select_bucket(htab, hash);
+	head = &b->head;
+
+	ret = htab_lock_bucket(htab, b, hash, &flags);
+	if (ret)
+		return ret;
+
+	l = lookup_elem_raw(head, hash, key, key_size);
+	if (l) {
+		if (is_percpu) {
+			pptr = htab_elem_get_ptr(l, key_size);
+			for_each_possible_cpu(cpu) {
+				bpf_long_memcpy(value + off,
+						per_cpu_ptr(pptr, cpu),
+						value_size);
+				off += value_size;
+			}
+		} else {
+			copy_map_value(map, value, l->key + round_up(key_size, 8));

For hashtab lookup elem, BPF_F_LOCK flag may be set by user, I think
hashtab lookup_and_delete_elem should also support this flag, so user
can ensure they always get a lock protected sane value.

We have the following libbpf APIs.

LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void
*value,
                                           __u64 flags);
LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
                                                void *value);

Previously, bpf_map_lookup_and_delete_elem only supports queue/stack,
which does not need flags as it does not support BPF_F_LOCK so we
are fine.

Maybe similar to bpf_map_lookup_elem_flags() we add a
bpf_map_lookup_and_delete_elem_flags()? Maybe libbpf v1.0
can consolidate into a better uniform api.


If I understood correctly, there shouldn't be much changes for this
addition:
- add LIBBPF_API prototype and function in bpf.[hc] - those are
   practically the same as bpf_map_lookup_elem_flags() but we call
   BPF_LOOKUP_AND_DELETE_ELEM syscall,

yes.

- add global declaration for bpf_map_lookup_elem_flags() in libbpf.map,

bpf_map_lookup_and_delete_elem_flags()

- make the necessary checks for flags and the lock in the functions,

yes.

- call copy_map_value_locked() if BPF_F_LOCK is set,

yes.

- mask lock with check_and_init_map_lock().

not sure about, current implementation is supposed to already
take care of this, but please double check.


Is this right or is there anything else I've missed?

yes, almost right except some minor comments above.


+		}
+
+		hlist_nulls_del_rcu(&l->hash_node);
+		if (!is_lru_map)
+			free_htab_elem(htab, l);
+	} else
+		ret = -ENOENT;
+
+	htab_unlock_bucket(htab, b, hash, flags);
+
+	if (is_lru_map && l)
+		bpf_lru_push_free(&htab->lru, &l->lru_node);
+
+	return ret;
+}
+
+static int htab_map_lookup_and_delete_elem(struct bpf_map *map,
+					   void *key, void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, false, false);
+}
+
+static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+						  void *key, void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, false, true);
+}
+
+static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+					       void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, true, false);
+}
+
+static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
+						      void *key, void *value)
+{
+	return __htab_map_lookup_and_delete_elem(map, key, value, true, true);
+}
+
   static int
   __htab_map_lookup_and_delete_batch(struct bpf_map *map,
   				   const union bpf_attr *attr,
@@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = {
   	.map_free = htab_map_free,
   	.map_get_next_key = htab_map_get_next_key,
   	.map_lookup_elem = htab_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
   	.map_update_elem = htab_map_update_elem,
   	.map_delete_elem = htab_map_delete_elem,
   	.map_gen_lookup = htab_map_gen_lookup,
@@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
   	.map_free = htab_map_free,
   	.map_get_next_key = htab_map_get_next_key,
   	.map_lookup_elem = htab_lru_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
   	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
   	.map_update_elem = htab_lru_map_update_elem,
   	.map_delete_elem = htab_lru_map_delete_elem,
@@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
   	.map_free = htab_map_free,
   	.map_get_next_key = htab_map_get_next_key,
   	.map_lookup_elem = htab_percpu_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
   	.map_update_elem = htab_percpu_map_update_elem,
   	.map_delete_elem = htab_map_delete_elem,
   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
@@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
   	.map_free = htab_map_free,
   	.map_get_next_key = htab_map_get_next_key,
   	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
+	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
   	.map_update_elem = htab_lru_percpu_map_update_elem,
   	.map_delete_elem = htab_lru_map_delete_elem,
   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c859bc46d06c..2634aa4a2f37 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
   		goto err_put;
   	}
- value_size = map->value_size;
+	value_size = bpf_map_value_size(map);
err = -ENOMEM;
   	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
@@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
   	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
   	    map->map_type == BPF_MAP_TYPE_STACK) {
   		err = map->ops->map_pop_elem(map, value);
+	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
+		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
+		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
+		if (!bpf_map_is_dev_bound(map)) {
+			bpf_disable_instrumentation();
+			rcu_read_lock();
+			err = map->ops->map_lookup_and_delete_elem(map, key, value);
+			rcu_read_unlock();
+			bpf_enable_instrumentation();
+			maybe_wait_bpf_programs(map);

maybe_wait_bpf_programs(map) is mostly for map-in-map.
but I think it is okay to put it here in case in the future
we will support map-in-map here. If maybe_wait_bpf_programs()
get inlined which mostly likely is the case, the compiler
should be able to optimize it away.


I didn't realise at first it's only for map-in-map and forgot to remove
it later, so I can remove this if you think it's better?

Originally I thought to keep it as the compiler should
be able to optimize it away. But since there is no immediate
use case yet for lookup-and-delete for hash of map-in-maps,
so let us remove maybe_wait_bpf_programs() to avoid confusion.
We can add it later if BPF_MAP_TYPE_HASH_OF_MAPS is added.


+		}
   	} else {
   		err = -ENOTSUPP;
   	}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..05123bbcdc1c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+	__u64 key, value = START_VALUE;
+	int err;
+
+	for (key = 1; key < MAX_ENTRIES + 1; key++) {
+		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+		if (CHECK(err, "bpf_map_update_elem", "failed\n"))

You can use
	if (!ASSERT_OK(err, "bpf_map_update_elem"))
to save you from explicit "failed" string.
The same for some later other CHECK usages.


Ok.

+			return -1;
+	}
+
+	return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+	BPF_DECLARE_PERCPU(__u64, value);
+	int i, err;
+	u64 key;
+
+	for (i = 0; i < nr_cpus; i++)
+		bpf_percpu(value, i) = START_VALUE;
+
+	for (key = 1; key < MAX_ENTRIES + 1; key++) {
+		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+			return -1;
+	}
+
+	return 0;
+}
+
[...]
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..eb19de8bb415
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid;
+__u64 set_key;
+__u64 set_value;

Please add "= 0" to the above declaration to make
it llvm10 friendly.


Ok, I'll add this. Sorry, checkpatch.pl gave me an error with it, that's
why I removed it.

Song Liu recently added a patch to suppress the warning:
  commit 5b8f82e1a17695c9e5fec5842b234967782d7e5b
  Author: Song Liu <songliubraving@xxxxxx>
  Date:   Thu Feb 25 17:22:08 2021 -0800

    checkpatch: do not apply "initialise globals to 0" check to BPF progs

You should be good now.


+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, __u64);
+	__type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+	if (set_pid == bpf_get_current_pid_tgid() >> 32)
+		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
[...]



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux