Allow local_storage maps to claim exclusive use of a cache slot in struct bpf_local_storage's cache. When a local_storage map claims a slot and is cached in via bpf_local_storage_lookup, it will not be replaced until the map is free'd. As a result, after a local_storage map is alloc'd for a specific bpf_local_storage, lookup calls after the first will quickly find the correct map. When requesting an exclusive cache slot, bpf_local_storage_cache_idx_get can now fail if all slots are already claimed. Because a map's cache_idx is assigned when the bpf_map is allocated - which occurs before the program runs - the map load and subsequent prog load will fail. A bit in struct bpf_map's map_extra is used to designate whether a map would like to claim an exclusive slot. Similarly, bitmap idx_exclusive is added to bpf_local_storage_cache to track whether a slot is exclusively claimed. Functions that manipulate the cache are modified to test for BPF_LOCAL_STORAGE_FORCE_CACHE bit and test/set idx_exclusive where necessary. When a map exclusively claims a cache slot, non-exclusive local_storage maps which were previously assigned the same cache_idx are not migrated to unclaimed cache_idx. Such a migration would require full iteration of the cache list and necessitate a reverse migration on map free to even things out. Since a used cache slot will only be exclusively claimed if no empty slot exists, the additional complexity was deemed unnecessary. Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx> --- include/linux/bpf_local_storage.h | 6 +++-- include/uapi/linux/bpf.h | 14 +++++++++++ kernel/bpf/bpf_inode_storage.c | 16 +++++++++--- kernel/bpf/bpf_local_storage.c | 42 +++++++++++++++++++++++++------ kernel/bpf/bpf_task_storage.c | 16 +++++++++--- kernel/bpf/syscall.c | 7 ++++-- net/core/bpf_sk_storage.c | 15 ++++++++--- 7 files changed, 95 insertions(+), 21 deletions(-) diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 493e63258497..d87405a1b65d 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -109,6 +109,7 @@ struct bpf_local_storage { struct bpf_local_storage_cache { spinlock_t idx_lock; u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE]; + DECLARE_BITMAP(idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE); }; #define DEFINE_BPF_STORAGE_CACHE(name) \ @@ -116,9 +117,10 @@ static struct bpf_local_storage_cache name = { \ .idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock), \ } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache); +int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache, + u64 flags); void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, - u16 idx); + u16 idx, u64 flags); /* Helper functions for bpf_local_storage */ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d14b10b85e51..566035bc2f08 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1257,6 +1257,18 @@ enum bpf_stack_build_id_status { BPF_STACK_BUILD_ID_IP = 2, }; +/* Flags passed in map_extra when creating local_storage maps + * of types: BPF_MAP_TYPE_INODE_STORAGE + * BPF_MAP_TYPE_TASK_STORAGE + * BPF_MAP_TYPE_SK_STORAGE + */ +enum bpf_local_storage_extra_flags { + /* Give the map exclusive use of a local_storage cache slot + * or fail map alloc + */ + BPF_LOCAL_STORAGE_FORCE_CACHE = (1U << 0), +}; + #define BPF_BUILD_ID_SIZE 20 struct bpf_stack_build_id { __s32 status; @@ -1296,6 +1308,8 @@ union bpf_attr { * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the * number of hash functions (if 0, the bloom filter will default * to using 5 hash functions). + * BPF_MAP_TYPE_{INODE,TASK,SK}_STORAGE - local_storage specific + * flags (see bpf_local_storage_extra_flags) */ __u64 map_extra; }; diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c index 96be8d518885..8b32adc23fc3 100644 --- a/kernel/bpf/bpf_inode_storage.c +++ b/kernel/bpf/bpf_inode_storage.c @@ -227,12 +227,21 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; + int cache_idx_or_err; + + cache_idx_or_err = bpf_local_storage_cache_idx_get(&inode_cache, + attr->map_extra); + if (cache_idx_or_err < 0) + return ERR_PTR(cache_idx_or_err); smap = bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) + if (IS_ERR(smap)) { + bpf_local_storage_cache_idx_free(&inode_cache, (u16)cache_idx_or_err, + attr->map_extra); return ERR_CAST(smap); + } - smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache); + smap->cache_idx = (u16)cache_idx_or_err; return &smap->map; } @@ -241,7 +250,8 @@ static void inode_storage_map_free(struct bpf_map *map) struct bpf_local_storage_map *smap; smap = (struct bpf_local_storage_map *)map; - bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx); + bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx, + map->map_extra); bpf_local_storage_map_free(smap, NULL); } diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index 01aa2b51ec4d..b23080247bef 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -231,12 +231,19 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, { struct bpf_local_storage_data *sdata; struct bpf_local_storage_elem *selem; + struct bpf_local_storage_map *cached; + bool cached_exclusive = false; /* Fast path (cache hit) */ sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], bpf_rcu_lock_held()); - if (sdata && rcu_access_pointer(sdata->smap) == smap) - return sdata; + if (sdata) { + if (rcu_access_pointer(sdata->smap) == smap) + return sdata; + + cached = rcu_dereference_check(sdata->smap, bpf_rcu_lock_held()); + cached_exclusive = cached->map.map_extra & BPF_LOCAL_STORAGE_FORCE_CACHE; + } /* Slow path (cache miss) */ hlist_for_each_entry_rcu(selem, &local_storage->list, snode, @@ -248,7 +255,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage, return NULL; sdata = SDATA(selem); - if (cacheit_lockit) { + if (cacheit_lockit && !cached_exclusive) { unsigned long flags; /* spinlock is needed to avoid racing with the @@ -482,15 +489,27 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, return ERR_PTR(err); } -u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) +int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache, + u64 flags) { + bool exclusive = flags & BPF_LOCAL_STORAGE_FORCE_CACHE; + bool adding_to_full = false; u64 min_usage = U64_MAX; - u16 i, res = 0; + int res = 0; + u16 i; spin_lock(&cache->idx_lock); + if (bitmap_full(cache->idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE)) { + res = -ENOMEM; + adding_to_full = true; + if (exclusive) + goto out; + } + for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) { - if (cache->idx_usage_counts[i] < min_usage) { + if ((adding_to_full || !test_bit(i, cache->idx_exclusive)) && + cache->idx_usage_counts[i] < min_usage) { min_usage = cache->idx_usage_counts[i]; res = i; @@ -499,17 +518,23 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) break; } } + + if (exclusive) + set_bit(res, cache->idx_exclusive); cache->idx_usage_counts[res]++; +out: spin_unlock(&cache->idx_lock); return res; } void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, - u16 idx) + u16 idx, u64 flags) { spin_lock(&cache->idx_lock); + if (flags & BPF_LOCAL_STORAGE_FORCE_CACHE) + clear_bit(idx, cache->idx_exclusive); cache->idx_usage_counts[idx]--; spin_unlock(&cache->idx_lock); } @@ -583,7 +608,8 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) attr->max_entries || attr->key_size != sizeof(int) || !attr->value_size || /* Enforce BTF for userspace sk dumping */ - !attr->btf_key_type_id || !attr->btf_value_type_id) + !attr->btf_key_type_id || !attr->btf_value_type_id || + attr->map_extra & ~BPF_LOCAL_STORAGE_FORCE_CACHE) return -EINVAL; if (!bpf_capable()) diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c index 6638a0ecc3d2..bf7b098d15c9 100644 --- a/kernel/bpf/bpf_task_storage.c +++ b/kernel/bpf/bpf_task_storage.c @@ -289,12 +289,21 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key) static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; + int cache_idx_or_err; + + cache_idx_or_err = bpf_local_storage_cache_idx_get(&task_cache, + attr->map_extra); + if (cache_idx_or_err < 0) + return ERR_PTR(cache_idx_or_err); smap = bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) + if (IS_ERR(smap)) { + bpf_local_storage_cache_idx_free(&task_cache, (u16)cache_idx_or_err, + attr->map_extra); return ERR_CAST(smap); + } - smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache); + smap->cache_idx = (u16)cache_idx_or_err; return &smap->map; } @@ -303,7 +312,8 @@ static void task_storage_map_free(struct bpf_map *map) struct bpf_local_storage_map *smap; smap = (struct bpf_local_storage_map *)map; - bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx); + bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx, + map->map_extra); bpf_local_storage_map_free(smap, &bpf_task_storage_busy); } diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e9621cfa09f2..9fd610e53840 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -847,8 +847,11 @@ static int map_create(union bpf_attr *attr) return -EINVAL; } - if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER && - attr->map_extra != 0) + if (!(attr->map_type == BPF_MAP_TYPE_BLOOM_FILTER || + attr->map_type == BPF_MAP_TYPE_INODE_STORAGE || + attr->map_type == BPF_MAP_TYPE_TASK_STORAGE || + attr->map_type == BPF_MAP_TYPE_SK_STORAGE) && + attr->map_extra != 0) return -EINVAL; f_flags = bpf_get_file_flag(attr->map_flags); diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index e3ac36380520..f6a95f525f50 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -90,19 +90,28 @@ static void bpf_sk_storage_map_free(struct bpf_map *map) struct bpf_local_storage_map *smap; smap = (struct bpf_local_storage_map *)map; - bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx); + bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx, map->map_extra); bpf_local_storage_map_free(smap, NULL); } static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; + int cache_idx_or_err; + + cache_idx_or_err = bpf_local_storage_cache_idx_get(&sk_cache, + attr->map_extra); + if (cache_idx_or_err < 0) + return ERR_PTR(cache_idx_or_err); smap = bpf_local_storage_map_alloc(attr); - if (IS_ERR(smap)) + if (IS_ERR(smap)) { + bpf_local_storage_cache_idx_free(&sk_cache, (u16)cache_idx_or_err, + attr->map_extra); return ERR_CAST(smap); + } - smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache); + smap->cache_idx = (u16)cache_idx_or_err; return &smap->map; } -- 2.30.2