[PATCH RFC bpf-next v1 26/32] bpf: Wire up freeing of bpf_list_heads in maps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Until now, bpf_list_heads in maps were not being freed. Wire up code
needed to release them when found in map value.
This will also handle freeling local kptr with bpf_list_head in them.

Note that bpf_list_head in map value requires appropriate locking during
the draining operation.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx>
---
 include/linux/bpf.h            |  3 +++
 kernel/bpf/arraymap.c          |  8 +++++++
 kernel/bpf/bpf_local_storage.c | 11 ++++++----
 kernel/bpf/hashtab.c           | 22 +++++++++++++++++++
 kernel/bpf/helpers.c           | 14 ++++++++++++
 kernel/bpf/syscall.c           | 39 ++++++++++++++++++++++++++++++++++
 6 files changed, 93 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3353c47fefa9..ad18408ba442 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -381,6 +381,8 @@ static inline void zero_map_value(struct bpf_map *map, void *dst)
 
 void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 			   bool lock_src);
+void bpf_map_value_lock(struct bpf_map *map, void *map_value);
+void bpf_map_value_unlock(struct bpf_map *map, void *map_value);
 void bpf_timer_cancel_and_free(void *timer);
 int bpf_obj_name_cpy(char *dst, const char *src, unsigned int size);
 
@@ -1736,6 +1738,7 @@ struct bpf_map_value_off_desc *bpf_map_list_head_off_contains(struct bpf_map *ma
 void bpf_map_free_list_head_off_tab(struct bpf_map *map);
 struct bpf_map_value_off *bpf_map_copy_list_head_off_tab(const struct bpf_map *map);
 bool bpf_map_equal_list_head_off_tab(const struct bpf_map *map_a, const struct bpf_map *map_b);
+void bpf_map_free_list_heads(struct bpf_map *map, void *map_value);
 
 struct bpf_map *bpf_map_get(u32 ufd);
 struct bpf_map *bpf_map_get_with_uref(u32 ufd);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index c7263ee3a35f..5412fa66d659 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -312,6 +312,8 @@ static void check_and_free_fields(struct bpf_array *arr, void *val)
 		bpf_timer_cancel_and_free(val + arr->map.timer_off);
 	if (map_value_has_kptrs(&arr->map))
 		bpf_map_free_kptrs(&arr->map, val);
+	if (map_value_has_list_heads(&arr->map))
+		bpf_map_free_list_heads(&arr->map, val);
 }
 
 /* Called from syscall or from eBPF program */
@@ -443,6 +445,12 @@ static void array_map_free(struct bpf_map *map)
 		bpf_map_free_kptr_off_tab(map);
 	}
 
+	if (map_value_has_list_heads(map)) {
+		for (i = 0; i < array->map.max_entries; i++)
+			bpf_map_free_list_heads(map, array_map_elem_ptr(array, i));
+		bpf_map_free_list_head_off_tab(map);
+	}
+
 	if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY)
 		bpf_array_free_percpu(array);
 
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index b5ccd76026b6..e89c6aa5d782 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -107,6 +107,8 @@ static void check_and_free_fields(struct bpf_local_storage_elem *selem)
 {
 	if (map_value_has_kptrs(selem->map))
 		bpf_map_free_kptrs(selem->map, SDATA(selem));
+	if (map_value_has_list_heads(selem->map))
+		bpf_map_free_list_heads(selem->map, SDATA(selem));
 }
 
 static void bpf_selem_free_rcu(struct rcu_head *rcu)
@@ -608,13 +610,14 @@ void bpf_local_storage_map_free(struct bpf_local_storage_map *smap,
 	 */
 	synchronize_rcu();
 
-	/* When local storage map has kptrs, the call_rcu callback accesses
-	 * kptr_off_tab, hence we need the bpf_selem_free_rcu callbacks to
-	 * finish before we free it.
+	/* When local storage map has kptrs or bpf_list_heads, the call_rcu
+	 * callback accesses kptr_off_tab or list_head_off_tab, hence we need
+	 * the bpf_selem_free_rcu callbacks to finish before we free it.
 	 */
-	if (map_value_has_kptrs(&smap->map)) {
+	if (map_value_has_kptrs(&smap->map) || map_value_has_list_heads(&smap->map)) {
 		rcu_barrier();
 		bpf_map_free_kptr_off_tab(&smap->map);
+		bpf_map_free_list_head_off_tab(&smap->map);
 	}
 	bpf_map_free_list_head_off_tab(&smap->map);
 	kvfree(smap->buckets);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 270e0ecf4ba3..bd1637fa7e3b 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -297,6 +297,25 @@ static void htab_free_prealloced_kptrs(struct bpf_htab *htab)
 	}
 }
 
+static void htab_free_prealloced_list_heads(struct bpf_htab *htab)
+{
+	u32 num_entries = htab->map.max_entries;
+	int i;
+
+	if (!map_value_has_list_heads(&htab->map))
+		return;
+	if (htab_has_extra_elems(htab))
+		num_entries += num_possible_cpus();
+
+	for (i = 0; i < num_entries; i++) {
+		struct htab_elem *elem;
+
+		elem = get_htab_elem(htab, i);
+		bpf_map_free_list_heads(&htab->map, elem->key + round_up(htab->map.key_size, 8));
+		cond_resched();
+	}
+}
+
 static void htab_free_elems(struct bpf_htab *htab)
 {
 	int i;
@@ -782,6 +801,8 @@ static void check_and_free_fields(struct bpf_htab *htab,
 			bpf_map_free_kptrs(&htab->map, map_value);
 		}
 	}
+	if (map_value_has_list_heads(&htab->map))
+		bpf_map_free_list_heads(&htab->map, map_value);
 }
 
 /* It is called from the bpf_lru_list when the LRU needs to delete
@@ -1514,6 +1535,7 @@ static void htab_map_free(struct bpf_map *map)
 	if (!htab_is_prealloc(htab)) {
 		delete_all_elements(htab);
 	} else {
+		htab_free_prealloced_list_heads(htab);
 		htab_free_prealloced_kptrs(htab);
 		prealloc_destroy(htab);
 	}
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 168460a03ec3..832dd57ae608 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -377,6 +377,20 @@ void copy_map_value_locked(struct bpf_map *map, void *dst, void *src,
 	preempt_enable();
 }
 
+void bpf_map_value_lock(struct bpf_map *map, void *map_value)
+{
+	WARN_ON_ONCE(map->spin_lock_off < 0);
+	preempt_disable();
+	__bpf_spin_lock_irqsave(map_value + map->spin_lock_off);
+}
+
+void bpf_map_value_unlock(struct bpf_map *map, void *map_value)
+{
+	WARN_ON_ONCE(map->spin_lock_off < 0);
+	__bpf_spin_unlock_irqrestore(map_value + map->spin_lock_off);
+	preempt_enable();
+}
+
 BPF_CALL_0(bpf_jiffies64)
 {
 	return get_jiffies_64();
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1af9a7cba08c..f1e244b03382 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -600,6 +600,13 @@ static void bpf_free_local_kptr(const struct btf *btf, u32 btf_id, void *kptr)
 
 	if (!kptr)
 		return;
+	/* There is no requirement to lock the bpf_spin_lock protecting
+	 * bpf_list_head in local kptr, as these are single ownership,
+	 * so if we have access to the kptr through xchg, we own it.
+	 *
+	 * If iterating elements of bpf_list_head in map value we are
+	 * already holding the lock for it.
+	 */
 	/* We must free bpf_list_head in local kptr */
 	t = btf_type_by_id(btf, btf_id);
 	/* TODO: We should just populate this info once in struct btf, and then
@@ -697,6 +704,38 @@ bool bpf_map_equal_list_head_off_tab(const struct bpf_map *map_a, const struct b
 				       map_value_has_list_heads(map_b));
 }
 
+void bpf_map_free_list_heads(struct bpf_map *map, void *map_value)
+{
+	struct bpf_map_value_off *tab = map->list_head_off_tab;
+	int i;
+
+	/* TODO: Should we error when bpf_list_head is alone in map value,
+	 * during BTF parsing, instead of ignoring it?
+	 */
+	if (map->spin_lock_off < 0)
+		return;
+
+	bpf_map_value_lock(map, map_value);
+	for (i = 0; i < tab->nr_off; i++) {
+		struct bpf_map_value_off_desc *off_desc = &tab->off[i];
+		struct list_head *list, *olist;
+		void *entry;
+
+		olist = list = map_value + off_desc->offset;
+		list = list->next;
+		if (!list)
+			goto init;
+		while (list != olist) {
+			entry = list - off_desc->list_head.list_node_off;
+			list = list->next;
+			bpf_free_local_kptr(off_desc->list_head.btf, off_desc->list_head.value_type_id, entry);
+		}
+	init:
+		INIT_LIST_HEAD(olist);
+	}
+	bpf_map_value_unlock(map, map_value);
+}
+
 /* called from workqueue */
 static void bpf_map_free_deferred(struct work_struct *work)
 {
-- 
2.34.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux