On Tue, Feb 08, 2022 at 04:13:01PM -0800, Yonghong Song wrote: > > > On 2/8/22 11:13 AM, Alexei Starovoitov wrote: > > From: Alexei Starovoitov <ast@xxxxxxxxxx> > > > > Prepare light skeleton to be used in the kernel module and in the user space. > > The look and feel of lskel.h is mostly the same with the difference that for > > user space the skel->rodata is the same pointer before and after skel_load > > operation, while in the kernel the skel->rodata after skel_open and the > > skel->rodata after skel_load are different pointers. > > Typical usage of skeleton remains the same for kernel and user space: > > skel = my_bpf__open(); > > skel->rodata->my_global_var = init_val; > > err = my_bpf__load(skel); > > err = my_bpf__attach(skel); > > // access skel->rodata->my_global_var; > > // access skel->bss->another_var; > > > > Signed-off-by: Alexei Starovoitov <ast@xxxxxxxxxx> > > --- > > tools/lib/bpf/skel_internal.h | 193 +++++++++++++++++++++++++++++++--- > > 1 file changed, 176 insertions(+), 17 deletions(-) > > > > diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h > > index dcd3336512d4..d16544666341 100644 > > --- a/tools/lib/bpf/skel_internal.h > > +++ b/tools/lib/bpf/skel_internal.h > > @@ -3,9 +3,19 @@ > > #ifndef __SKEL_INTERNAL_H > > #define __SKEL_INTERNAL_H > > +#ifdef __KERNEL__ > > +#include <linux/fdtable.h> > > +#include <linux/mm.h> > > +#include <linux/mman.h> > > +#include <linux/slab.h> > > +#include <linux/bpf.h> > > +#else > > #include <unistd.h> > > #include <sys/syscall.h> > > #include <sys/mman.h> > > +#include <stdlib.h> > > +#include "bpf.h" > > +#endif > > #ifndef __NR_bpf > > # if defined(__mips__) && defined(_ABIO32) > > @@ -25,17 +35,11 @@ > > * requested during loader program generation. > > */ > > struct bpf_map_desc { > > - union { > > - /* input for the loader prog */ > > - struct { > > - __aligned_u64 initial_value; > > - __u32 max_entries; > > - }; > > - /* output of the loader prog */ > > - struct { > > - int map_fd; > > - }; > > - }; > > + /* output of the loader prog */ > > + int map_fd; > > + /* input for the loader prog */ > > + __u32 max_entries; > > + __aligned_u64 initial_value; > > }; > > struct bpf_prog_desc { > > int prog_fd; > > @@ -57,12 +61,159 @@ struct bpf_load_and_run_opts { > > const char *errstr; > > }; > > +long bpf_sys_bpf(__u32 cmd, void *attr, __u32 attr_size); > > + > > static inline int skel_sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, > > unsigned int size) > > { > > +#ifdef __KERNEL__ > > + return bpf_sys_bpf(cmd, attr, size); > > +#else > > return syscall(__NR_bpf, cmd, attr, size); > > +#endif > > +} > > + > > +#ifdef __KERNEL__ > > +static inline int close(int fd) > > +{ > > + return close_fd(fd); > > +} > > + > > +static inline void *skel_alloc(size_t size) > > +{ > > + return kcalloc(1, size, GFP_KERNEL); > > +} > > + > > +static inline void skel_free(const void *p) > > +{ > > + kfree(p); > > +} > > + > > +/* skel->bss/rodata maps are populated in three steps. > > + * > > + * For kernel use: > > + * skel_prep_map_data() allocates kernel memory that kernel module can directly access. > > + * skel_prep_init_value() allocates a region in user space process and copies > > + * potentially modified initial map value into it. > > + * The loader program will perform copy_from_user() from maps.rodata.initial_value. > > + * skel_finalize_map_data() sets skel->rodata to point to actual value in a bpf map and > > + * does maps.rodata.initial_value = ~0ULL to signal skel_free_map_data() that kvfree > > + * is not nessary. > > + * > > + * For user space: > > + * skel_prep_map_data() mmaps anon memory into skel->rodata that can be accessed directly. > > + * skel_prep_init_value() copies rodata pointer into map.rodata.initial_value. > > + * The loader program will perform copy_from_user() from maps.rodata.initial_value. > > + * skel_finalize_map_data() remaps bpf array map value from the kernel memory into > > + * skel->rodata address. > > + * > > + * The "bpftool gen skeleton -L" command generates lskel.h that is suitable for > > + * both kernel and user space. The generated loader program does > > + * copy_from_user() from intial_value. Therefore the vm_mmap+copy_to_user step > > + * is need when lskel is used from the kernel module. > > + */ > > +static inline void skel_free_map_data(void *p, __u64 addr, size_t sz) > > +{ > > + if (addr && addr != ~0ULL) > > + vm_munmap(addr, sz); > > + if (addr != ~0ULL) > > + kvfree(p); > > + /* When addr == ~0ULL the 'p' points to > > + * ((struct bpf_array *)map)->value. See skel_finalize_map_data. > > + */ > > +} > > + > > +static inline void *skel_prep_map_data(const void *val, size_t mmap_sz, size_t val_sz) > > +{ > > + void *addr; > > + > > + addr = kvmalloc(val_sz, GFP_KERNEL); > > + if (!addr) > > + return NULL; > > + memcpy(addr, val, val_sz); > > + return addr; > > +} > > + > > +static inline __u64 skel_prep_init_value(void **addr, size_t mmap_sz, size_t val_sz) > > +{ > > + __u64 ret = 0; > > + void *uaddr; > > + > > + uaddr = (void *) vm_mmap(NULL, 0, mmap_sz, PROT_READ | PROT_WRITE, > > + MAP_SHARED | MAP_ANONYMOUS, 0); > > + if (IS_ERR(uaddr)) > > + goto out; > > + if (copy_to_user(uaddr, *addr, val_sz)) { > > + vm_munmap((long) uaddr, mmap_sz); > > + goto out; > > + } > > + ret = (__u64) (long) uaddr; > > +out: > > + kvfree(*addr); > > + *addr = NULL; > > + return ret; > > } > > +static inline void *skel_finalize_map_data(__u64 *addr, size_t mmap_sz, int flags, int fd) > > +{ > > + struct bpf_map *map; > > + void *ptr = NULL; > > + > > + vm_munmap(*addr, mmap_sz); > > + *addr = ~0ULL; > > + > > + map = bpf_map_get(fd); > > + if (IS_ERR(map)) > > + return NULL; > > + if (map->map_type != BPF_MAP_TYPE_ARRAY) > > + goto out; > > Should we do more map validation here, e.g., max_entries = 1 > and also checking value_size? The map_type check is a sanity check. It should be valid by construction of loader prog. The map is also mmap-able and when signed progs will come to life it will be frozen and signature checked. rodata map should be readonly too, but ((struct bpf_array *)map)->value direct access assumes that the kernel module won't mess with the values. imo map_type check is enough. More checks feels like overkill.