Re: [PATCH v2 bpf-next 2/5] libbpf: Prepare light skeleton for the kernel.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2/8/22 4:44 PM, Alexei Starovoitov wrote:
On Tue, Feb 08, 2022 at 04:13:01PM -0800, Yonghong Song wrote:


On 2/8/22 11:13 AM, Alexei Starovoitov wrote:
From: Alexei Starovoitov <ast@xxxxxxxxxx>

Prepare light skeleton to be used in the kernel module and in the user space.
The look and feel of lskel.h is mostly the same with the difference that for
user space the skel->rodata is the same pointer before and after skel_load
operation, while in the kernel the skel->rodata after skel_open and the
skel->rodata after skel_load are different pointers.
Typical usage of skeleton remains the same for kernel and user space:
skel = my_bpf__open();
skel->rodata->my_global_var = init_val;
err = my_bpf__load(skel);
err = my_bpf__attach(skel);
// access skel->rodata->my_global_var;
// access skel->bss->another_var;

Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx>
---
   tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++---
   1 file changed, 176 insertions(+), 17 deletions(-)

diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index dcd3336512d4..d16544666341 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -3,9 +3,19 @@
   #ifndef __SKEL_INTERNAL_H
   #define __SKEL_INTERNAL_H
+#ifdef __KERNEL__
+#include <linux/fdtable.h>
+#include <linux/mm.h>
+#include <linux/mman.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#else
   #include <unistd.h>
   #include <sys/syscall.h>
   #include <sys/mman.h>
+#include <stdlib.h>
+#include "bpf.h"
+#endif
   #ifndef __NR_bpf
   # if defined(__mips__) && defined(_ABIO32)
@@ -25,17 +35,11 @@
    * requested during loader program generation.
    */
   struct bpf_map_desc {
-	union {
-		/* input for the loader prog */
-		struct {
-			__aligned_u64 initial_value;
-			__u32 max_entries;
-		};
-		/* output of the loader prog */
-		struct {
-			int map_fd;
-		};
-	};
+	/* output of the loader prog */
+	int map_fd;
+	/* input for the loader prog */
+	__u32 max_entries;
+	__aligned_u64 initial_value;
   };
   struct bpf_prog_desc {
   	int prog_fd;
@@ -57,12 +61,159 @@ struct bpf_load_and_run_opts {
   	const char *errstr;
   };
+long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size);
+
   static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr,
   			  unsigned int size)
   {
+#ifdef __KERNEL__
+	return bpf_sys_bpf(cmd, attr, size);
+#else
   	return syscall(__NR_bpf, cmd, attr, size);
+#endif
+}
+
+#ifdef __KERNEL__
+static inline int close(int fd)
+{
+	return close_fd(fd);
+}
+
+static inline void *skel_alloc(size_t size)
+{
+	return kcalloc(1, size, GFP_KERNEL);
+}
+
+static inline void skel_free(const void *p)
+{
+	kfree(p);
+}
+
+/* skel->bss/rodata maps are populated in three steps.
+ *
+ * For kernel use:
+ * skel_prep_map_data() allocates kernel memory that kernel module can directly access.
+ * skel_prep_init_value() allocates a region in user space process and copies
+ * potentially modified initial map value into it.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and
+ * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree
+ * is not nessary.
+ *
+ * For user space:
+ * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly.
+ * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value.
+ * The loader program will perform copy_from_user() from maps.rodata.initial_value.
+ * skel_finalize_map_data() remaps bpf array map value from the kernel memory into
+ * skel->rodata address.
+ *
+ * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for
+ * both kernel and user space. The generated loader program does
+ * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step
+ * is need when lskel is used from the kernel module.
+ */
+static inline void skel_free_map_data(void *p, __u64 addr, size_t sz)
+{
+	if (addr && addr != ~0ULL)
+		vm_munmap(addr, sz);
+	if (addr != ~0ULL)
+		kvfree(p);
+	/* When addr == ~0ULL the 'p' points to
+	 * ((struct bpf_array *)map)->value. See skel_finalize_map_data.
+	 */
+}
+
+static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz)
+{
+	void *addr;
+
+	addr = kvmalloc(val_sz, GFP_KERNEL);
+	if (!addr)
+		return NULL;
+	memcpy(addr, val, val_sz);
+	return addr;
+}
+
+static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz)
+{
+	__u64 ret = 0;
+	void *uaddr;
+
+	uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE,
+				 MAP_SHARED | MAP_ANONYMOUS, 0);
+	if (IS_ERR(uaddr))
+		goto out;
+	if (copy_to_user(uaddr, *addr, val_sz)) {
+		vm_munmap((long) uaddr, mmap_sz);
+		goto out;
+	}
+	ret = (__u64) (long) uaddr;
+out:
+	kvfree(*addr);
+	*addr = NULL;
+	return ret;
   }
+static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd)
+{
+	struct bpf_map *map;
+	void *ptr = NULL;
+
+	vm_munmap(*addr, mmap_sz);
+	*addr = ~0ULL;
+
+	map = bpf_map_get(fd);
+	if (IS_ERR(map))
+		return NULL;
+	if (map->map_type != BPF_MAP_TYPE_ARRAY)
+		goto out;

Should we do more map validation here, e.g., max_entries = 1
and also checking value_size?

The map_type check is a sanity check.

Yes, I am aware of this as a sanity check. I am just wondering if we do this check whether we should do other checks as well.

It should be valid by construction of loader prog.
The map is also mmap-able and when signed progs will come to life it will be
frozen and signature checked.
rodata map should be readonly too, but ((struct bpf_array *)map)->value
direct access assumes that the kernel module won't mess with the values.
imo map_type check is enough. More checks feels like overkill.

I am okay with this. Maybe add a comment right before
bpf_map_get() to explain map with fd is created by loader prog so
map should be a valid array map with max_entries 1, the IS_ERR
and map_type checks are just sanity checks.

With this,

Acked-by: Yonghong Song <yhs@xxxxxx>



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux