On 5/5/21 2:40 AM, Denis Salopek wrote:
Extend the existing bpf_map_lookup_and_delete_elem() functionality to
hashtab map types, in addition to stacks and queues.
Create a new hashtab bpf_map_ops function that does lookup and deletion
of the element under the same bucket lock and add the created map_ops to
bpf.h.
Cc: Juraj Vijtiuk <juraj.vijtiuk@xxxxxxxxxx>
Cc: Luka Oreskovic <luka.oreskovic@xxxxxxxxxx>
Cc: Luka Perkov <luka.perkov@xxxxxxxxxx>
Cc: Yonghong Song <yhs@xxxxxx>
Cc: Andrii Nakryiko <andrii.nakryiko@xxxxxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Signed-off-by: Denis Salopek <denis.salopek@xxxxxxxxxx>
It would be good if you can have a cover letter since this is
a 3-patch series and the changelog below can be put in
the cover letter too.
The patch looks good to me with a nit below.
Acked-by: Yonghong Song <yhs@xxxxxx>
---
v2: Add functionality for LRU/per-CPU, add test_progs tests.
v3: Add bpf_map_lookup_and_delete_elem_flags() and enable BPF_F_LOCK
flag, change CHECKs to ASSERT_OKs, initialize variables to 0.
v4: Fix the return value for unsupported map types.
v5: Split patch to 3 patches. Extend BPF_MAP_LOOKUP_AND_DELETE_ELEM
documentation with this changes.
v6: Remove unneeded flag check, minor code/format fixes.
---
Since the above changelog is not part of commit message, people
typically either put in the cover letter or put right before the
"diff --git ..." as when the patch is applied, the changelog
will be ignored.
include/linux/bpf.h | 2 +
include/uapi/linux/bpf.h | 13 +++++
kernel/bpf/hashtab.c | 97 ++++++++++++++++++++++++++++++++++
kernel/bpf/syscall.c | 34 ++++++++++--
tools/include/uapi/linux/bpf.h | 13 +++++
5 files changed, 155 insertions(+), 4 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 02b02cb29ce2..9da98d98db25 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -69,6 +69,8 @@ struct bpf_map_ops {
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
union bpf_attr __user *uattr);
+ int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
+ void *value, u64 flags);
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
const union bpf_attr *attr,
union bpf_attr __user *uattr);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ec6d85a81744..c10ba06af69e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -527,6 +527,15 @@ union bpf_iter_link_info {
* Look up an element with the given *key* in the map referred to
* by the file descriptor *fd*, and if found, delete the element.
*
+ * For **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map
+ * types, the *flags* argument needs to be set to 0, but for other
+ * map types, it may be specified as:
+ *
+ * **BPF_F_LOCK**
+ * Look up and delete the value of a spin-locked map
+ * without returning the lock. This must be specified if
+ * the elements contain a spinlock.
+ *
* The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
* implement this command as a "pop" operation, deleting the top
* element rather than one corresponding to *key*.
@@ -536,6 +545,10 @@ union bpf_iter_link_info {
* This command is only valid for the following map types:
* * **BPF_MAP_TYPE_QUEUE**
* * **BPF_MAP_TYPE_STACK**
+ * * **BPF_MAP_TYPE_HASH**
+ * * **BPF_MAP_TYPE_PERCPU_HASH**
+ * * **BPF_MAP_TYPE_LRU_HASH**
+ * * **BPF_MAP_TYPE_LRU_PERCPU_HASH**
*
* Return
* Returns zero on success. On error, -1 is returned and *errno*
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d7ebb12ffffc..d68d403795e4 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1401,6 +1401,99 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
rcu_read_unlock();
}
+static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
+ void *value, bool is_lru_map,
+ bool is_percpu, u64 flags)
+{
+ struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+ struct hlist_nulls_head *head;
+ unsigned long bflags;
+ struct htab_elem *l;
+ u32 hash, key_size;
+ struct bucket *b;
+ int ret;
+
+ key_size = map->key_size;
+
+ hash = htab_map_hash(key, key_size, htab->hashrnd);
+ b = __select_bucket(htab, hash);
+ head = &b->head;
+
+ ret = htab_lock_bucket(htab, b, hash, &bflags);
+ if (ret)
+ return ret;
+
+ l = lookup_elem_raw(head, hash, key, key_size);
+ if (!l) {
+ ret = -ENOENT;
+ } else {
+ if (is_percpu) {
+ u32 roundup_value_size = round_up(map->value_size, 8);
+ void __percpu *pptr;
+ int off = 0, cpu;
+
+ pptr = htab_elem_get_ptr(l, key_size);
+ for_each_possible_cpu(cpu) {
+ bpf_long_memcpy(value + off,
+ per_cpu_ptr(pptr, cpu),
+ roundup_value_size);
+ off += roundup_value_size;
+ }
+ } else {
+ u32 roundup_key_size = round_up(map->key_size, 8);
nit: let us have an empty line between declaration and other statements.
+ if (flags & BPF_F_LOCK)
+ copy_map_value_locked(map, value, l->key +
+ roundup_key_size,
+ true);
+ else
+ copy_map_value(map, value, l->key +
+ roundup_key_size);
+ check_and_init_map_lock(map, value);
+ }
+
+ hlist_nulls_del_rcu(&l->hash_node);
+ if (!is_lru_map)
+ free_htab_elem(htab, l);
+ }
+
+ htab_unlock_bucket(htab, b, hash, bflags);
+
+ if (is_lru_map && l)
+ bpf_lru_push_free(&htab->lru, &l->lru_node);
+
+ return ret;
+}
[...]