This logic is similar to the recursion prevention logic for task local storage: bpf programs on LSM hooks lock bpf_inode_storage_busy; bpf tracing program will try to lock bpf_inode_storage_busy, and may return -EBUSY if something else already lock bpf_inode_storage_busy on the same CPU. Signed-off-by: Song Liu <song@xxxxxxxxxx> --- kernel/bpf/bpf_inode_storage.c | 153 +++++++++++++++++++++++++++------ kernel/trace/bpf_trace.c | 4 + 2 files changed, 133 insertions(+), 24 deletions(-) diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 8d5a9bfe6643..3a94a38e24f0 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -21,6 +21,31 @@ DEFINE_BPF_STORAGE_CACHE(inode_cache); +static DEFINE_PER_CPU(int, bpf_inode_storage_busy); + +static void bpf_inode_storage_lock(void) +{ + migrate_disable(); + this_cpu_inc(bpf_inode_storage_busy); +} + +static void bpf_inode_storage_unlock(void) +{ + this_cpu_dec(bpf_inode_storage_busy); + migrate_enable(); +} + +static bool bpf_inode_storage_trylock(void) +{ + migrate_disable(); + if (unlikely(this_cpu_inc_return(bpf_inode_storage_busy) != 1)) { + this_cpu_dec(bpf_inode_storage_busy); + migrate_enable(); + return false; + } + return true; +} + static struct bpf_local_storage __rcu **inode_storage_ptr(void *owner) { struct inode *inode = owner; @@ -56,7 +81,9 @@ void bpf_inode_storage_free(struct inode *inode) return; } + bpf_inode_storage_lock(); bpf_local_storage_destroy(local_storage); + bpf_inode_storage_unlock(); rcu_read_unlock(); } @@ -68,7 +95,9 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key) if (fd_empty(f)) return ERR_PTR(-EBADF); + bpf_inode_storage_lock(); sdata = inode_storage_lookup(file_inode(fd_file(f)), map, true); + bpf_inode_storage_unlock(); return sdata ? sdata->data : NULL; } @@ -81,13 +110,16 @@ static long bpf_fd_inode_storage_update_elem(struct bpf_map *map, void *key, if (fd_empty(f)) return -EBADF; + bpf_inode_storage_lock(); sdata = bpf_local_storage_update(file_inode(fd_file(f)), (struct bpf_local_storage_map *)map, value, map_flags, false, GFP_ATOMIC); + bpf_inode_storage_unlock(); return PTR_ERR_OR_ZERO(sdata); } -static int inode_storage_delete(struct inode *inode, struct bpf_map *map) +static int inode_storage_delete(struct inode *inode, struct bpf_map *map, + bool nobusy) { struct bpf_local_storage_data *sdata; @@ -95,6 +127,9 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) if (!sdata) return -ENOENT; + if (!nobusy) + return -EBUSY; + bpf_selem_unlink(SELEM(sdata), false); return 0; @@ -102,55 +137,105 @@ static int inode_storage_delete(struct inode *inode, struct bpf_map *map) static long bpf_fd_inode_storage_delete_elem(struct bpf_map *map, void *key) { + int err; + CLASS(fd_raw, f)(*(int *)key); if (fd_empty(f)) return -EBADF; - return inode_storage_delete(file_inode(fd_file(f)), map); + bpf_inode_storage_lock(); + err = inode_storage_delete(file_inode(fd_file(f)), map, true); + bpf_inode_storage_unlock(); + return err; } -/* *gfp_flags* is a hidden argument provided by the verifier */ -BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, - void *, value, u64, flags, gfp_t, gfp_flags) +static void *__bpf_inode_storage_get(struct bpf_map *map, struct inode *inode, + void *value, u64 flags, gfp_t gfp_flags, bool nobusy) { struct bpf_local_storage_data *sdata; - WARN_ON_ONCE(!bpf_rcu_lock_held()); - if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) - return (unsigned long)NULL; - + /* explicitly check that the inode not NULL */ if (!inode) - return (unsigned long)NULL; + return NULL; sdata = inode_storage_lookup(inode, map, true); if (sdata) - return (unsigned long)sdata->data; + return sdata->data; - /* This helper must only called from where the inode is guaranteed - * to have a refcount and cannot be freed. - */ - if (flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { + /* only allocate new storage, when the inode is refcounted */ + if (atomic_read(&inode->i_count) && + flags & BPF_LOCAL_STORAGE_GET_F_CREATE) { sdata = bpf_local_storage_update( inode, (struct bpf_local_storage_map *)map, value, BPF_NOEXIST, false, gfp_flags); - return IS_ERR(sdata) ? (unsigned long)NULL : - (unsigned long)sdata->data; + return IS_ERR(sdata) ? NULL : sdata->data; } - return (unsigned long)NULL; + return NULL; +} + +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_inode_storage_get_recur, struct bpf_map *, map, struct inode *, inode, + void *, value, u64, flags, gfp_t, gfp_flags) +{ + bool nobusy; + void *data; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + nobusy = bpf_inode_storage_trylock(); + data = __bpf_inode_storage_get(map, inode, value, flags, gfp_flags, nobusy); + if (nobusy) + bpf_inode_storage_unlock(); + return (unsigned long)data; +} + +/* *gfp_flags* is a hidden argument provided by the verifier */ +BPF_CALL_5(bpf_inode_storage_get, struct bpf_map *, map, struct inode *, inode, + void *, value, u64, flags, gfp_t, gfp_flags) +{ + void *data; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (flags & ~(BPF_LOCAL_STORAGE_GET_F_CREATE)) + return (unsigned long)NULL; + + bpf_inode_storage_lock(); + data = __bpf_inode_storage_get(map, inode, value, flags, gfp_flags, true); + bpf_inode_storage_unlock(); + return (unsigned long)data; +} + +BPF_CALL_2(bpf_inode_storage_delete_recur, struct bpf_map *, map, struct inode *, inode) +{ + bool nobusy; + int ret; + + WARN_ON_ONCE(!bpf_rcu_lock_held()); + if (!inode) + return -EINVAL; + + nobusy = bpf_inode_storage_trylock(); + ret = inode_storage_delete(inode, map, nobusy); + if (nobusy) + bpf_inode_storage_unlock(); + return ret; } -BPF_CALL_2(bpf_inode_storage_delete, - struct bpf_map *, map, struct inode *, inode) +BPF_CALL_2(bpf_inode_storage_delete, struct bpf_map *, map, struct inode *, inode) { + int ret; + WARN_ON_ONCE(!bpf_rcu_lock_held()); if (!inode) return -EINVAL; - /* This helper must only called from where the inode is guaranteed - * to have a refcount and cannot be freed. - */ - return inode_storage_delete(inode, map); + bpf_inode_storage_lock(); + ret = inode_storage_delete(inode, map, true); + bpf_inode_storage_unlock(); + return ret; } static int notsupp_get_next_key(struct bpf_map *map, void *key, @@ -186,6 +271,17 @@ const struct bpf_map_ops inode_storage_map_ops = { BTF_ID_LIST_SINGLE(bpf_inode_storage_btf_ids, struct, inode) +const struct bpf_func_proto bpf_inode_storage_get_recur_proto = { + .func = bpf_inode_storage_get_recur, + .gpl_only = false, + .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, + .arg2_btf_id = &bpf_inode_storage_btf_ids[0], + .arg3_type = ARG_PTR_TO_MAP_VALUE_OR_NULL, + .arg4_type = ARG_ANYTHING, +}; + const struct bpf_func_proto bpf_inode_storage_get_proto = { .func = bpf_inode_storage_get, .gpl_only = false, @@ -197,6 +293,15 @@ const struct bpf_func_proto bpf_inode_storage_get_proto = { .arg4_type = ARG_ANYTHING, }; +const struct bpf_func_proto bpf_inode_storage_delete_recur_proto = { + .func = bpf_inode_storage_delete_recur, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_PTR_TO_BTF_ID_OR_NULL, + .arg2_btf_id = &bpf_inode_storage_btf_ids[0], +}; + const struct bpf_func_proto bpf_inode_storage_delete_proto = { .func = bpf_inode_storage_delete, .gpl_only = false, diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 262bd101ea0b..4616f5430a5e 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -1554,8 +1554,12 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_task_storage_delete_recur_proto; return &bpf_task_storage_delete_proto; case BPF_FUNC_inode_storage_get: + if (bpf_prog_check_recur(prog)) + return &bpf_inode_storage_get_recur_proto; return &bpf_inode_storage_get_proto; case BPF_FUNC_inode_storage_delete: + if (bpf_prog_check_recur(prog)) + return &bpf_inode_storage_delete_recur_proto; return &bpf_inode_storage_delete_proto; case BPF_FUNC_for_each_map_elem: return &bpf_for_each_map_elem_proto; -- 2.43.5