Re: [PATCH v2 bpf-next 2/5] libbpf: Prepare light skeleton for the kernel.

Yonghong Song <yhs@xxxxxx> · Tue, 8 Feb 2022 16:13:01 -0800

On 2/8/22 11:13 AM, Alexei Starovoitov wrote:
From: Alexei Starovoitov <ast@xxxxxxxxxx>

Prepare light skeleton to be used in the kernel module and in the user space.
The look and feel of lskel.h is mostly the same with the difference that for
user space the skel->rodata is the same pointer before and after skel_load
operation, while in the kernel the skel->rodata after skel_open and the
skel->rodata after skel_load are different pointers.
Typical usage of skeleton remains the same for kernel and user space:
skel = my_bpf__open();
skel->rodata->my_global_var = init_val;
err = my_bpf__load(skel);
err = my_bpf__attach(skel);
// access skel->rodata->my_global_var;
// access skel->bss->another_var;

Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
  tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++---
  1 file changed, 176 insertions(+), 17 deletions(-)

diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index dcd3336512d4..d16544666341 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -3,9 +3,19 @@
  #ifndef __SKEL_INTERNAL_H
  #define __SKEL_INTERNAL_H
  
+#ifdef __KERNEL__
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#else
  #include <unistd.h>
  #include <sys/syscall.h>
  #include <sys/mman.h>
+#include <stdlib.h>
+#include "bpf.h"
+#endif
  
  #ifndef __NR_bpf
  # if defined(__mips__) && defined(_ABIO32)
@@ -25,17 +35,11 @@
   * requested during loader program generation.
   */
  struct bpf_map_desc {
-	union {
-		/* input for the loader prog */
-		struct {
-			__aligned_u64 initial_value;
-			__u32 max_entries;
-		};
-		/* output of the loader prog */
-		struct {
-			int map_fd;
-		};
-	};
+	/* output of the loader prog */
+	int map_fd;
+	/* input for the loader prog */
+	__u32 max_entries;
+	__aligned_u64 initial_value;
  };
  struct bpf_prog_desc {
  	int prog_fd;
@@ -57,12 +61,159 @@ struct bpf_load_and_run_opts {
  	const char *errstr;
  };
  
+long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
+
  static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
  			  unsigned int size)
  {
+#ifdef __KERNEL__
+	return bpf_sys_bpf(cmd, attr, size);
+#else
  	return syscall(__NR_bpf, cmd, attr, size);
+#endif
+}
+
+#ifdef __KERNEL__
+static inline int close(int fd)
+{
+	return close_fd(fd);
+}
+
+static inline void *skel_alloc(size_t size)
+{
+	return kcalloc(1, size, GFP_KERNEL);
+}
+
+static inline void skel_free(const void *p)
+{
+	kfree(p);
+}
+
+/* skel->bss/rodata maps are populated in three steps.
+ *
+ * For kernel use:
+ * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
+ * skel_prep_init_value() allocates a region in user space process and copies
+ * potentially modified initial map value into it.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
+ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
+ * is not nessary.
+ *
+ * For user space:
+ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
+ * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
+ * skel->rodata address.
+ *
+ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
+ * both kernel and user space. The generated loader program does
+ * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step
+ * is need when lskel is used from the kernel module.
+ */
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	if (addr && addr != ~0ULL)
+		vm_munmap(addr, sz);
+	if (addr != ~0ULL)
+		kvfree(p);
+	/* When addr == ~0ULL the 'p' points to
+	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
+	 */
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = kvmalloc(val_sz, GFP_KERNEL);
+	if (!addr)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
+{
+	__u64 ret = 0;
+	void *uaddr;
+
+	uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS, 0);
+	if (IS_ERR(uaddr))
+		goto out;
+	if (copy_to_user(uaddr, *addr, val_sz)) {
+		vm_munmap((long) uaddr, mmap_sz);
+		goto out;
+	}
+	ret = (__u64) (long) uaddr;
+out:
+	kvfree(*addr);
+	*addr = NULL;
+	return ret;
  }
  
+static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
+{
+	struct bpf_map *map;
+	void *ptr = NULL;
+
+	vm_munmap(*addr, mmap_sz);
+	*addr = ~0ULL;
+
+	map = bpf_map_get(fd);
+	if (IS_ERR(map))
+		return NULL;
+	if (map->map_type != BPF_MAP_TYPE_ARRAY)
+		goto out;

Should we do more map validation here, e.g., max_entries = 1
and also checking value_size?

+	ptr = ((struct bpf_array *)map)->value;
+	/* the ptr stays valid, since FD is not closed */
+out:
+	bpf_map_put(map);
+	return ptr;
+}
+
[...]