On Thu, Aug 26, 2021 at 07:09:09PM +0530, Kumar Kartikeya Dwivedi wrote: > This map is useful in general to tie data associated with a open file > (not fd) from eBPF programs, such that data is released when the file > goes away (e.g. checkpoint/restore usecase). > > Another usecase is implementing Capsicum [0] style capability sandbox in > userspace using eBPF LSM, enforcing rights at the file level using this > mechanism. > > [0]: https://www.usenix.org/legacy/event/sec10/tech/full_papers/Watson.pdf > > Signed-off-by: Kumar Kartikeya Dwivedi <memxor@xxxxxxxxx> > --- > include/linux/bpf_lsm.h | 21 +++ > include/linux/bpf_types.h | 1 + > include/uapi/linux/bpf.h | 39 ++++++ > kernel/bpf/Makefile | 2 +- > kernel/bpf/bpf_file_storage.c | 244 ++++++++++++++++++++++++++++++++++ > kernel/bpf/bpf_lsm.c | 4 + > kernel/bpf/syscall.c | 3 +- > kernel/bpf/verifier.c | 10 ++ > security/bpf/hooks.c | 2 + > 9 files changed, 324 insertions(+), 2 deletions(-) > create mode 100644 kernel/bpf/bpf_file_storage.c > > diff --git a/include/linux/bpf_lsm.h b/include/linux/bpf_lsm.h > index 479c101546ad..5901a39cd5ac 100644 > --- a/include/linux/bpf_lsm.h > +++ b/include/linux/bpf_lsm.h > @@ -42,6 +42,18 @@ extern const struct bpf_func_proto bpf_inode_storage_get_proto; > extern const struct bpf_func_proto bpf_inode_storage_delete_proto; > void bpf_inode_storage_free(struct inode *inode); > > +static inline struct bpf_storage_blob *bpf_file(const struct file *file) > +{ > + if (unlikely(!file->f_security)) > + return NULL; > + > + return file->f_security + bpf_lsm_blob_sizes.lbs_file; > +} > + > +extern const struct bpf_func_proto bpf_file_storage_get_proto; > +extern const struct bpf_func_proto bpf_file_storage_delete_proto; > +void bpf_file_storage_free(struct file *file); > + > #else /* !CONFIG_BPF_LSM */ > > static inline bool bpf_lsm_is_sleepable_hook(u32 btf_id) > @@ -65,6 +77,15 @@ static inline void bpf_inode_storage_free(struct inode *inode) > { > } > > +static inline struct bpf_storage_blob *bpf_file(const struct file *file) > +{ > + return NULL; > +} > + > +static inline void bpf_file_storage_free(struct file *file) > +{ > +} > + > #endif /* CONFIG_BPF_LSM */ > > #endif /* _LINUX_BPF_LSM_H */ > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > index 9c81724e4b98..c68cc6d9e7da 100644 > --- a/include/linux/bpf_types.h > +++ b/include/linux/bpf_types.h > @@ -107,6 +107,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, dev_map_hash_ops) > BPF_MAP_TYPE(BPF_MAP_TYPE_SK_STORAGE, sk_storage_map_ops) > #ifdef CONFIG_BPF_LSM > BPF_MAP_TYPE(BPF_MAP_TYPE_INODE_STORAGE, inode_storage_map_ops) > +BPF_MAP_TYPE(BPF_MAP_TYPE_FILE_STORAGE, file_storage_map_ops) > #endif > BPF_MAP_TYPE(BPF_MAP_TYPE_TASK_STORAGE, task_storage_map_ops) > BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h > index 791f31dd0abe..62aa1ff2dcfb 100644 > --- a/include/uapi/linux/bpf.h > +++ b/include/uapi/linux/bpf.h > @@ -906,6 +906,7 @@ enum bpf_map_type { > BPF_MAP_TYPE_RINGBUF, > BPF_MAP_TYPE_INODE_STORAGE, > BPF_MAP_TYPE_TASK_STORAGE, > + BPF_MAP_TYPE_FILE_STORAGE, > }; > > /* Note that tracing related programs such as > @@ -4877,6 +4878,42 @@ union bpf_attr { > * Get the struct pt_regs associated with **task**. > * Return > * A pointer to struct pt_regs. > + * > + * void *bpf_file_storage_get(struct bpf_map *map, void *file, void *value, u64 flags) > + * Description > + * Get a bpf_local_storage from a *file*. > + * > + * Logically, it could be thought of as getting the value from > + * a *map* with *file* as the **key**. From this > + * perspective, the usage is not much different from > + * **bpf_map_lookup_elem**\ (*map*, **&**\ *file*) except this > + * helper enforces the key must be an file and the map must also > + * be a **BPF_MAP_TYPE_FILE_STORAGE**. > + * > + * Underneath, the value is stored locally at *file* instead of > + * the *map*. The *map* is used as the bpf-local-storage > + * "type". The bpf-local-storage "type" (i.e. the *map*) is > + * searched against all bpf_local_storage residing at *file*. > + * > + * An optional *flags* (**BPF_LOCAL_STORAGE_GET_F_CREATE**) can be > + * used such that a new bpf_local_storage will be > + * created if one does not exist. *value* can be used > + * together with **BPF_LOCAL_STORAGE_GET_F_CREATE** to specify > + * the initial value of a bpf_local_storage. If *value* is > + * **NULL**, the new bpf_local_storage will be zero initialized. > + * Return > + * A bpf_local_storage pointer is returned on success. > + * > + * **NULL** if not found or there was an error in adding > + * a new bpf_local_storage. > + * > + * int bpf_file_storage_delete(struct bpf_map *map, void *file) > + * Description > + * Delete a bpf_local_storage from a *file*. > + * Return > + * 0 on success. > + * > + * **-ENOENT** if the bpf_local_storage cannot be found. > */ > #define __BPF_FUNC_MAPPER(FN) \ > FN(unspec), \ > @@ -5055,6 +5092,8 @@ union bpf_attr { > FN(get_func_ip), \ > FN(get_attach_cookie), \ > FN(task_pt_regs), \ > + FN(file_storage_get), \ > + FN(file_storage_delete), \ > /* */ > > /* integer value in 'imm' field of BPF_CALL instruction selects which helper > diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile > index 7f33098ca63f..98a18e402a0a 100644 > --- a/kernel/bpf/Makefile > +++ b/kernel/bpf/Makefile > @@ -10,7 +10,7 @@ obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_i > obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o > obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o > obj-$(CONFIG_BPF_SYSCALL) += bpf_local_storage.o bpf_task_storage.o > -obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o > +obj-${CONFIG_BPF_LSM} += bpf_inode_storage.o bpf_file_storage.o > obj-$(CONFIG_BPF_SYSCALL) += disasm.o > obj-$(CONFIG_BPF_JIT) += trampoline.o > obj-$(CONFIG_BPF_SYSCALL) += btf.o > diff --git a/kernel/bpf/bpf_file_storage.c b/kernel/bpf/bpf_file_storage.c > new file mode 100644 > index 000000000000..c826bc0405c4 > --- /dev/null > +++ b/kernel/bpf/bpf_file_storage.c > @@ -0,0 +1,244 @@ > +// SPDX-License-Identifier: GPL-2.0 > + > +#include <linux/rculist.h> > +#include <linux/list.h> > +#include <linux/hash.h> > +#include <linux/types.h> > +#include <linux/filter.h> > +#include <linux/spinlock.h> > +#include <linux/bpf.h> > +#include <linux/bpf_local_storage.h> > +#include <uapi/linux/btf.h> > +#include <linux/bpf_lsm.h> > +#include <linux/btf_ids.h> > + > +DEFINE_BPF_STORAGE_CACHE(file_cache); > + > +static struct bpf_local_storage __rcu **file_storage_ptr(void *owner) > +{ > + struct bpf_storage_blob *bsb; > + struct file *file = owner; > + > + bsb = bpf_file(file); > + if (!bsb) > + return NULL; > + return &bsb->storage; > +} > + > +static struct bpf_local_storage_data * > +file_storage_lookup(struct file *file, struct bpf_map *map, bool cacheit_lockit) > +{ > + struct bpf_local_storage *file_storage; > + struct bpf_local_storage_map *smap; > + struct bpf_storage_blob *bsb; > + > + bsb = bpf_file(file); > + if (!bsb) > + return NULL; > + > + file_storage = rcu_dereference(bsb->storage); It's possible that I am (and the docs are) behind the times, or (very likely) I'm missing something else, but Documentation/RCU/whatisRCU.rst says that rcu_dereference result is only valid within a rcu read-side critical section. Here it doesn't seem like you're in a rcu_read_unlock at all. Will the callers (bpf_map_ops->map_lookup_elem) be called that way? > + if (!file_storage) > + return NULL; > + > + smap = (struct bpf_local_storage_map *)map; > + return bpf_local_storage_lookup(file_storage, smap, cacheit_lockit); > +} > + > +void bpf_file_storage_free(struct file *file) > +{ > + struct bpf_local_storage *local_storage; > + struct bpf_local_storage_elem *selem; > + bool free_file_storage = false; > + struct bpf_storage_blob *bsb; > + struct hlist_node *n; > + > + bsb = bpf_file(file); > + if (!bsb) > + return; > + > + rcu_read_lock(); > + > + local_storage = rcu_dereference(bsb->storage); Here you've called rcu_read_lock, but you use the result of it, 'local_storage', after dropping the rcu_read_unlock, which whatisRCU.rst explicitly calls out as a bug. > + if (!local_storage) { > + rcu_read_unlock(); > + return; > + } > + > + raw_spin_lock_bh(&local_storage->lock); > + hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { > + bpf_selem_unlink_map(selem); > + free_file_storage = bpf_selem_unlink_storage_nolock(local_storage, > + selem, false); > + } > + raw_spin_unlock_bh(&local_storage->lock); > + rcu_read_unlock(); > + > + if (free_file_storage) > + kfree_rcu(local_storage, rcu); > +} > + > +static void *bpf_fd_file_storage_lookup_elem(struct bpf_map *map, void *key) > +{ > + struct bpf_local_storage_data *sdata; > + struct file *file; > + int fd; > + > + fd = *(int *)key; > + file = fget_raw(fd); > + if (!file) > + return ERR_PTR(-EBADF); > + > + sdata = file_storage_lookup(file, map, true); > + fput(file); > + return sdata ? sdata->data : NULL; > +} > + > +static int bpf_fd_file_storage_update_elem(struct bpf_map *map, void *key, > + void *value, u64 map_flags) > +{ > + struct bpf_local_storage_data *sdata; > + struct file *file; > + int fd; > + > + fd = *(int *)key; > + file = fget_raw(fd); > + if (!file) > + return -EBADF; > + if (!file_storage_ptr(file)) { > + fput(file); > + return -EBADF; > + } > + > + sdata = bpf_local_storage_update(file, > + (struct bpf_local_storage_map *)map, > + value, map_flags); > + fput(file); > + return PTR_ERR_OR_ZERO(sdata); > +} > + > +static int file_storage_delete(struct file *file, struct bpf_map *map) > +{ > + struct bpf_local_storage_data *sdata; > + > + sdata = file_storage_lookup(file, map, false); > + if (!sdata) > + return -ENOENT; > + > + bpf_selem_unlink(SELEM(sdata)); > + > + return 0; > +} > + > +static int bpf_fd_file_storage_delete_elem(struct bpf_map *map, void *key) > +{ > + struct file *file; > + int fd, err; > + > + fd = *(int *)key; > + file = fget_raw(fd); > + if (!file) > + return -EBADF; > + > + err = file_storage_delete(file, map); > + fput(file); > + return err; > +} > + > +BPF_CALL_4(bpf_file_storage_get, struct bpf_map *, map, struct file *, file, > + void *, value, u64, flags) > +{ > + struct bpf_local_storage_data *sdata; > + > + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) > + return (unsigned long)NULL; > + > + if (!file || !file_storage_ptr(file)) > + return (unsigned long)NULL; > + > + sdata = file_storage_lookup(file, map, true); > + if (sdata) > + return (unsigned long)sdata->data; > + > + if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { > + sdata = bpf_local_storage_update( > + file, (struct bpf_local_storage_map *)map, value, > + BPF_NOEXIST); > + return IS_ERR(sdata) ? (unsigned long)NULL : > + (unsigned long)sdata->data; > + } > + > + return (unsigned long)NULL; > +} > + > +BPF_CALL_2(bpf_file_storage_delete, struct bpf_map *, map, struct file *, file) > +{ > + if (!file) > + return -EINVAL; > + > + return file_storage_delete(file, map); > +} > + > +static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) > +{ > + return -ENOTSUPP; > +} > + > +static struct bpf_map *file_storage_map_alloc(union bpf_attr *attr) > +{ > + struct bpf_local_storage_map *smap; > + > + smap = bpf_local_storage_map_alloc(attr); > + if (IS_ERR(smap)) > + return ERR_CAST(smap); > + > + smap->cache_idx = bpf_local_storage_cache_idx_get(&file_cache); > + return &smap->map; > +} > + > +static void file_storage_map_free(struct bpf_map *map) > +{ > + struct bpf_local_storage_map *smap; > + > + smap = (struct bpf_local_storage_map *)map; > + bpf_local_storage_cache_idx_free(&file_cache, smap->cache_idx); > + bpf_local_storage_map_free(smap, NULL); > +} > + > +static int file_storage_map_btf_id; > + > +const struct bpf_map_ops file_storage_map_ops = { > + .map_meta_equal = bpf_map_meta_equal, > + .map_alloc_check = bpf_local_storage_map_alloc_check, > + .map_alloc = file_storage_map_alloc, > + .map_free = file_storage_map_free, > + .map_get_next_key = notsupp_get_next_key, > + .map_lookup_elem = bpf_fd_file_storage_lookup_elem, > + .map_update_elem = bpf_fd_file_storage_update_elem, > + .map_delete_elem = bpf_fd_file_storage_delete_elem, > + .map_check_btf = bpf_local_storage_map_check_btf, > + .map_btf_name = "bpf_local_storage_map", > + .map_btf_id = &file_storage_map_btf_id, > + .map_owner_storage_ptr = file_storage_ptr, > +}; > + > +BTF_ID_LIST_SINGLE(bpf_file_storage_btf_ids, struct, file) > + > +const struct bpf_func_proto bpf_file_storage_get_proto = { > + .func = bpf_file_storage_get, > + .gpl_only = false, > + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, > + .arg1_type = ARG_CONST_MAP_PTR, > + .arg2_type = ARG_PTR_TO_BTF_ID, > + .arg2_btf_id = &bpf_file_storage_btf_ids[0], > + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, > + .arg4_type = ARG_ANYTHING, > +}; > + > +const struct bpf_func_proto bpf_file_storage_delete_proto = { > + .func = bpf_file_storage_delete, > + .gpl_only = false, > + .ret_type = RET_INTEGER, > + .arg1_type = ARG_CONST_MAP_PTR, > + .arg2_type = ARG_PTR_TO_BTF_ID, > + .arg2_btf_id = &bpf_file_storage_btf_ids[0], > +}; > diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c > index 06062370c3b8..48c2022fd958 100644 > --- a/kernel/bpf/bpf_lsm.c > +++ b/kernel/bpf/bpf_lsm.c > @@ -121,6 +121,10 @@ bpf_lsm_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) > return &bpf_bprm_opts_set_proto; > case BPF_FUNC_ima_inode_hash: > return prog->aux->sleepable ? &bpf_ima_inode_hash_proto : NULL; > + case BPF_FUNC_file_storage_get: > + return &bpf_file_storage_get_proto; > + case BPF_FUNC_file_storage_delete: > + return &bpf_file_storage_delete_proto; > default: > return tracing_prog_func_proto(func_id, prog); > } > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 4e50c0bfdb7d..946a85945776 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -783,7 +783,8 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf, > map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && > map->map_type != BPF_MAP_TYPE_SK_STORAGE && > map->map_type != BPF_MAP_TYPE_INODE_STORAGE && > - map->map_type != BPF_MAP_TYPE_TASK_STORAGE) > + map->map_type != BPF_MAP_TYPE_TASK_STORAGE && > + map->map_type != BPF_MAP_TYPE_FILE_STORAGE) > return -ENOTSUPP; > if (map->spin_lock_off + sizeof(struct bpf_spin_lock) > > map->value_size) { > diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c > index 206c221453cf..c703d58681a8 100644 > --- a/kernel/bpf/verifier.c > +++ b/kernel/bpf/verifier.c > @@ -5390,6 +5390,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > func_id != BPF_FUNC_task_storage_delete) > goto error; > break; > + case BPF_MAP_TYPE_FILE_STORAGE: > + if (func_id != BPF_FUNC_file_storage_get && > + func_id != BPF_FUNC_file_storage_delete) > + goto error; > + break; > default: > break; > } > @@ -5473,6 +5478,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, > if (map->map_type != BPF_MAP_TYPE_TASK_STORAGE) > goto error; > break; > + case BPF_FUNC_file_storage_get: > + case BPF_FUNC_file_storage_delete: > + if (map->map_type != BPF_MAP_TYPE_FILE_STORAGE) > + goto error; > + break; > default: > break; > } > diff --git a/security/bpf/hooks.c b/security/bpf/hooks.c > index e5971fa74fd7..faa70467db4d 100644 > --- a/security/bpf/hooks.c > +++ b/security/bpf/hooks.c > @@ -13,6 +13,7 @@ static struct security_hook_list bpf_lsm_hooks[] __lsm_ro_after_init = { > #undef LSM_HOOK > LSM_HOOK_INIT(inode_free_security, bpf_inode_storage_free), > LSM_HOOK_INIT(task_free, bpf_task_storage_free), > + LSM_HOOK_INIT(file_free_security, bpf_file_storage_free), > }; > > static int __init bpf_lsm_init(void) > @@ -25,6 +26,7 @@ static int __init bpf_lsm_init(void) > struct lsm_blob_sizes bpf_lsm_blob_sizes __lsm_ro_after_init = { > .lbs_inode = sizeof(struct bpf_storage_blob), > .lbs_task = sizeof(struct bpf_storage_blob), > + .lbs_file = sizeof(struct bpf_storage_blob), > }; > > DEFINE_LSM(bpf) = { > -- > 2.33.0