On 11/7/23 6:06 AM, Hou Tao wrote:
From: Hou Tao <houtao1@xxxxxxxxxx>
bpf_map_of_map_fd_get_ptr() will convert the map fd to the pointer
saved in map-in-map. bpf_map_of_map_fd_put_ptr() will release the
pointer saved in map-in-map. These two helpers will be used by the
following patches to fix the use-after-free problems for map-in-map.
Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>
---
kernel/bpf/map_in_map.c | 51 +++++++++++++++++++++++++++++++++++++++++
kernel/bpf/map_in_map.h | 11 +++++++--
2 files changed, 60 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 8323ce201159d..96e32f4167c4e 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -4,6 +4,7 @@
#include <linux/slab.h>
#include <linux/bpf.h>
#include <linux/btf.h>
+#include <linux/rcupdate.h>
#include "map_in_map.h"
@@ -139,3 +140,53 @@ u32 bpf_map_fd_sys_lookup_elem(void *ptr)
{
return ((struct bpf_map *)ptr)->id;
}
+
+void *bpf_map_of_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
+ int ufd)
+{
+ struct bpf_inner_map_element *element;
+ struct bpf_map *inner_map;
+
+ element = kmalloc(sizeof(*element), GFP_KERNEL);
+ if (!element)
+ return ERR_PTR(-ENOMEM);
+
+ inner_map = bpf_map_fd_get_ptr(map, map_file, ufd);
+ if (IS_ERR(inner_map)) {
+ kfree(element);
+ return inner_map;
+ }
+
+ element->map = inner_map;
+ return element;
+}
+
+static void bpf_inner_map_element_free_rcu(struct rcu_head *rcu)
+{
+ struct bpf_inner_map_element *elem = container_of(rcu, struct bpf_inner_map_element, rcu);
+
+ bpf_map_put(elem->map);
+ kfree(elem);
+}
+
+static void bpf_inner_map_element_free_tt_rcu(struct rcu_head *rcu)
+{
+ if (rcu_trace_implies_rcu_gp())
+ bpf_inner_map_element_free_rcu(rcu);
+ else
+ call_rcu(rcu, bpf_inner_map_element_free_rcu);
+}
+
+void bpf_map_of_map_fd_put_ptr(void *ptr, bool need_defer)
+{
+ struct bpf_inner_map_element *element = ptr;
+
+ /* Do bpf_map_put() after a RCU grace period and a tasks trace
+ * RCU grace period, so it is certain that the bpf program which is
+ * manipulating the map now has exited when bpf_map_put() is called.
+ */
+ if (need_defer)
"need_defer" should only happen from the syscall cmd? Instead of adding rcu_head
to each element, how about "synchronize_rcu_mult(call_rcu, call_rcu_tasks)" here?
+ call_rcu_tasks_trace(&element->rcu, bpf_inner_map_element_free_tt_rcu);
+ else
+ bpf_inner_map_element_free_rcu(&element->rcu);
+}
diff --git a/kernel/bpf/map_in_map.h b/kernel/bpf/map_in_map.h
index 63872bffd9b3c..8d38496e5179b 100644
--- a/kernel/bpf/map_in_map.h
+++ b/kernel/bpf/map_in_map.h
@@ -9,11 +9,18 @@
struct file;
struct bpf_map;
+struct bpf_inner_map_element {
+ struct bpf_map *map;
+ struct rcu_head rcu;
+};
+
struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd);
void bpf_map_meta_free(struct bpf_map *map_meta);
-void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file,
- int ufd);
+void *bpf_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, int ufd);
void bpf_map_fd_put_ptr(void *ptr, bool need_defer);
u32 bpf_map_fd_sys_lookup_elem(void *ptr);
+void *bpf_map_of_map_fd_get_ptr(struct bpf_map *map, struct file *map_file, int ufd);
+void bpf_map_of_map_fd_put_ptr(void *ptr, bool need_defer);
+
#endif