From: Hou Tao <houtao1@xxxxxxxxxx> Userspace application uses bpf syscall to lookup or update bpf map. It passes a pointer of fixed-size buffer to kernel to represent the map key. To support map with variable-length key, introduce bpf_dynptr_user to allow userspace to pass a pointer of bpf_dynptr_user to specify the address and the length of key buffer. And in order to represent dynptr from userspace, adding a new dynptr type: BPF_DYNPTR_TYPE_USER. Because BPF_DYNPTR_TYPE_USER-typed dynptr is not available for bpf program, so verifier doesn't need update. To distinguish map with fixed-size key from map with variable-length one, add a new map flag: BPF_F_DYNPTR_KEY to do that. For map which enables BPF_F_DYNPTR_KEY, key btf type must be bpf_dynptr and the lower 32-bits of map_extra is used to specify the maximum size of dynptr key. Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> --- include/linux/bpf.h | 2 + include/uapi/linux/bpf.h | 9 +++ kernel/bpf/syscall.c | 108 ++++++++++++++++++++++++++++----- tools/include/uapi/linux/bpf.h | 8 +++ 4 files changed, 113 insertions(+), 14 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8da4a8190413..5060d7aee08c 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -2641,6 +2641,8 @@ enum bpf_dynptr_type { BPF_DYNPTR_TYPE_LOCAL, /* Underlying data is a ringbuf record */ BPF_DYNPTR_TYPE_RINGBUF, + /* Points to memory copied from/to userspace */ + BPF_DYNPTR_TYPE_USER, }; void bpf_dynptr_init(struct bpf_dynptr_kern *ptr, void *data, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 3df78c56c1bf..77a2828f8148 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1246,6 +1246,9 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), + +/* Map with bpf_dynptr-typed key */ + BPF_F_DYNPTR_KEY = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -6775,6 +6778,12 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr_user { + __u64 data; + __u32 size; + __u32 :32; +} __attribute__((aligned(8))); + struct bpf_dynptr { __u64 :64; __u64 :64; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index dab156f09f8d..fd15c13cef24 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -151,6 +151,11 @@ bool bpf_map_write_active(const struct bpf_map *map) return atomic64_read(&map->writecnt) != 0; } +static inline bool is_dynptr_key(const struct bpf_map *map) +{ + return map->map_flags & BPF_F_DYNPTR_KEY; +} + static u32 bpf_map_value_size(const struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -994,7 +999,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, /* Some maps allow key to be unspecified. */ if (btf_key_id) { key_type = btf_type_id_size(btf, &btf_key_id, &key_size); - if (!key_type || key_size != map->key_size) + if (!key_type || key_size != map->key_size || + (is_dynptr_key(map) && !btf_type_is_bpf_dynptr(btf, key_type))) return -EINVAL; } else { key_type = btf_type_by_id(btf, 0); @@ -1089,9 +1095,16 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } - if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && - attr->map_extra != 0) + if (attr->map_flags & BPF_F_DYNPTR_KEY) { + /* The lower 32-bits of map_extra specifies the maximum size + * of bpf_dynptr-typed key + */ + if (!attr->btf_key_type_id || !attr->map_extra || (attr->map_extra >> 32) || + bpf_dynptr_check_size(attr->map_extra)) + return -EINVAL; + } else if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && attr->map_extra != 0) { return -EINVAL; + } f_flags = bpf_get_file_flag(attr->map_flags); if (f_flags < 0) @@ -1280,8 +1293,39 @@ int __weak bpf_stackmap_copy(struct bpf_map *map, void *key, void *value) return -ENOTSUPP; } -static void *__bpf_copy_key(void __user *ukey, u64 key_size) +static void *bpf_copy_from_dynptr_ukey(bpfptr_t ukey) { + struct bpf_dynptr_kern *kptr; + struct bpf_dynptr_user uptr; + bpfptr_t data; + + if (copy_from_bpfptr(&uptr, ukey, sizeof(uptr))) + return ERR_PTR(-EFAULT); + + if (!uptr.size || bpf_dynptr_check_size(uptr.size)) + return ERR_PTR(-EINVAL); + + /* Allocate and free bpf_dynptr_kern and its data together */ + kptr = kvmalloc(sizeof(*kptr) + uptr.size, GFP_USER | __GFP_NOWARN); + if (!kptr) + return ERR_PTR(-ENOMEM); + + data = make_bpfptr(uptr.data, bpfptr_is_kernel(ukey)); + if (copy_from_bpfptr(&kptr[1], data, uptr.size)) { + kvfree(kptr); + return ERR_PTR(-EFAULT); + } + + bpf_dynptr_init(kptr, &kptr[1], BPF_DYNPTR_TYPE_USER, 0, uptr.size); + + return kptr; +} + +static void *__bpf_copy_key(void __user *ukey, u64 key_size, bool dynptr) +{ + if (dynptr) + return bpf_copy_from_dynptr_ukey(USER_BPFPTR(ukey)); + if (key_size) return vmemdup_user(ukey, key_size); @@ -1291,8 +1335,11 @@ static void *__bpf_copy_key(void __user *ukey, u64 key_size) return NULL; } -static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) +static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size, bool dynptr) { + if (dynptr) + return bpf_copy_from_dynptr_ukey(ukey); + if (key_size) return kvmemdup_bpfptr(ukey, key_size); @@ -1302,6 +1349,34 @@ static void *___bpf_copy_key(bpfptr_t ukey, u64 key_size) return NULL; } +static void *bpf_new_dynptr_key(u32 key_size) +{ + struct bpf_dynptr_kern *kptr; + + kptr = kvmalloc(sizeof(*kptr) + key_size, GFP_USER | __GFP_NOWARN); + if (kptr) + bpf_dynptr_init(kptr, &kptr[1], BPF_DYNPTR_TYPE_USER, 0, key_size); + return kptr; +} + +static int bpf_copy_to_dynptr_ukey(struct bpf_dynptr_user __user *uptr, + struct bpf_dynptr_kern *kptr) +{ + unsigned int size; + u64 udata; + + if (get_user(udata, &uptr->data)) + return -EFAULT; + + /* Also zeroing the reserved field in uptr */ + size = bpf_dynptr_get_size(kptr); + if (copy_to_user(u64_to_user_ptr(udata), kptr->data + kptr->offset, size) || + put_user(size, &uptr->size) || put_user(0, &uptr->size + 1)) + return -EFAULT; + + return 0; +} + /* last field in 'union bpf_attr' used by this command */ #define BPF_MAP_LOOKUP_ELEM_LAST_FIELD flags @@ -1337,7 +1412,7 @@ static int map_lookup_elem(union bpf_attr *attr) goto err_put; } - key = __bpf_copy_key(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size, is_dynptr_key(map)); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -1377,7 +1452,6 @@ static int map_lookup_elem(union bpf_attr *attr) return err; } - #define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) @@ -1410,7 +1484,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - key = ___bpf_copy_key(ukey, map->key_size); + key = ___bpf_copy_key(ukey, map->key_size, is_dynptr_key(map)); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -1458,7 +1532,7 @@ static int map_delete_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - key = ___bpf_copy_key(ukey, map->key_size); + key = ___bpf_copy_key(ukey, map->key_size, is_dynptr_key(map)); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -1514,7 +1588,7 @@ static int map_get_next_key(union bpf_attr *attr) } if (ukey) { - key = __bpf_copy_key(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size, is_dynptr_key(map)); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; @@ -1524,7 +1598,10 @@ static int map_get_next_key(union bpf_attr *attr) } err = -ENOMEM; - next_key = kvmalloc(map->key_size, GFP_USER); + if (is_dynptr_key(map)) + next_key = bpf_new_dynptr_key(map->map_extra); + else + next_key = kvmalloc(map->key_size, GFP_USER | __GFP_NOWARN); if (!next_key) goto free_key; @@ -1540,8 +1617,11 @@ static int map_get_next_key(union bpf_attr *attr) if (err) goto free_next_key; - err = -EFAULT; - if (copy_to_user(unext_key, next_key, map->key_size) != 0) + if (is_dynptr_key(map)) + err = bpf_copy_to_dynptr_ukey(unext_key, next_key); + else + err = copy_to_user(unext_key, next_key, map->key_size) != 0 ? -EFAULT : 0; + if (err) goto free_next_key; err = 0; @@ -1815,7 +1895,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) goto err_put; } - key = __bpf_copy_key(ukey, map->key_size); + key = __bpf_copy_key(ukey, map->key_size, is_dynptr_key(map)); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3df78c56c1bf..600c3fcee37a 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1246,6 +1246,8 @@ enum { /* Create a map that is suitable to be an inner map with dynamic max entries */ BPF_F_INNER_MAP = (1U << 12), +/* Map with bpf_dynptr-typed key */ + BPF_F_DYNPTR_KEY = (1U << 13), }; /* Flags for BPF_PROG_QUERY. */ @@ -6775,6 +6777,12 @@ struct bpf_timer { __u64 :64; } __attribute__((aligned(8))); +struct bpf_dynptr_user { + __u64 data; + __u32 size; + __u32 :32; +} __attribute__((aligned(8))); + struct bpf_dynptr { __u64 :64; __u64 :64; -- 2.29.2