On Tue, Jun 30, 2020 at 9:35 PM Martin KaFai Lau <kafai@xxxxxx> wrote: > > On Mon, Jun 29, 2020 at 06:01:00PM +0200, KP Singh wrote: > > > > [...] > > > > static atomic_t cache_idx; > > > inode local storage and sk local storage probably need a separate > > > cache_idx. An improvement on picking cache_idx has just been > > > landed also. > > > > I see, thanks! I rebased and I now see that cache_idx is now a: > > > > static u64 cache_idx_usage_counts[BPF_STORAGE_CACHE_SIZE]; > > > > which tracks the free cache slots rather than using a single atomic > > cache_idx. I guess all types of local storage can share this now > > right? > I believe they have to be separated. A sk-storage will not be cached/stored > in inode. Caching a sk-storage at idx=0 of a sk should not stop > an inode-storage to be cached at the same idx of a inode. Ah yes, I see. I came up with some macros to solve this. Let me know what you think: (this is on top of the refactoring I did, so some function names may seem new, but it should, hopefully, convey the general idea). diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h index 3067774cc640..1dc2e6d72091 100644 --- a/include/linux/bpf_local_storage.h +++ b/include/linux/bpf_local_storage.h @@ -79,6 +79,26 @@ struct bpf_local_storage_elem { #define SDATA(_SELEM) (&(_SELEM)->sdata) #define BPF_STORAGE_CACHE_SIZE 16 +u16 bpf_ls_cache_idx_get(spinlock_t *cache_idx_lock, + u64 *cache_idx_usage_count); + +void bpf_ls_cache_idx_free(spinlock_t *cache_idx_lock, + u64 *cache_idx_usage_counts, u16 idx); + +#define DEFINE_BPF_STORAGE_CACHE(type) \ +static DEFINE_SPINLOCK(cache_idx_lock_##type); \ +static u64 cache_idx_usage_counts_##type[BPF_STORAGE_CACHE_SIZE]; \ +static u16 cache_idx_get_##type(void) \ +{ \ + return bpf_ls_cache_idx_get(&cache_idx_lock_##type, \ + cache_idx_usage_counts_##type); \ +} \ +static void cache_idx_free_##type(u16 idx) \ +{ \ + return bpf_ls_cache_idx_free(&cache_idx_lock_##type, \ + cache_idx_usage_counts_##type, \ + idx); \ +} /* U16_MAX is much more than enough for sk local storage * considering a tcp_sock is ~2k. @@ -105,13 +125,14 @@ struct bpf_local_storage { /* Helper functions for bpf_local_storage */ int bpf_local_storage_map_alloc_check(union bpf_attr *attr); -struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr); +struct bpf_local_storage_map * +bpf_local_storage_map_alloc(union bpf_attr *attr); struct bpf_local_storage_data * bpf_local_storage_lookup(struct bpf_local_storage *local_storage, struct bpf_local_storage_map *smap, bool cacheit_lockit); -void bpf_local_storage_map_free(struct bpf_map *map); +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap); int bpf_local_storage_map_check_btf(const struct bpf_map *map, const struct btf *btf, const struct btf_type *key_type, diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c index fb589a5715f5..2bc04f8d1e35 100644 --- a/kernel/bpf/bpf_local_storage.c +++ b/kernel/bpf/bpf_local_storage.c @@ -17,9 +17,6 @@ container_of((_SDATA), struct bpf_local_storage_elem, sdata) #define SDATA(_SELEM) (&(_SELEM)->sdata) -static DEFINE_SPINLOCK(cache_idx_lock); -static u64 cache_idx_usage_counts[BPF_STORAGE_CACHE_SIZE]; - static struct bucket *select_bucket(struct bpf_local_storage_map *smap, struct bpf_local_storage_elem *selem) { @@ -460,12 +457,13 @@ static struct bpf_local_storage_data *inode_storage_update( map_flags); } -static u16 cache_idx_get(void) +u16 bpf_ls_cache_idx_get(spinlock_t *cache_idx_lock, + u64 *cache_idx_usage_counts) { u64 min_usage = U64_MAX; u16 i, res = 0; - spin_lock(&cache_idx_lock); + spin_lock(cache_idx_lock); for (i = 0; i < BPF_STORAGE_CACHE_SIZE; i++) { if (cache_idx_usage_counts[i] < min_usage) { @@ -479,16 +477,17 @@ static u16 cache_idx_get(void) } cache_idx_usage_counts[res]++; - spin_unlock(&cache_idx_lock); + spin_unlock(cache_idx_lock); return res; } -static void cache_idx_free(u16 idx) +void bpf_ls_cache_idx_free(spinlock_t *cache_idx_lock, + u64 *cache_idx_usage_counts, u16 idx) { - spin_lock(&cache_idx_lock); + spin_lock(cache_idx_lock); cache_idx_usage_counts[idx]--; - spin_unlock(&cache_idx_lock); + spin_unlock(cache_idx_lock); } static int inode_storage_delete(struct inode *inode, struct bpf_map *map) @@ -552,17 +551,12 @@ void bpf_inode_storage_free(struct inode *inode) kfree_rcu(local_storage, rcu); } -void bpf_local_storage_map_free(struct bpf_map *map) +void bpf_local_storage_map_free(struct bpf_local_storage_map *smap) { struct bpf_local_storage_elem *selem; - struct bpf_local_storage_map *smap; struct bucket *b; unsigned int i; - smap = (struct bpf_local_storage_map *)map; - - cache_idx_free(smap->cache_idx); - /* Note that this map might be concurrently cloned from * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone * RCU read section to finish before proceeding. New RCU @@ -607,7 +601,7 @@ void bpf_local_storage_map_free(struct bpf_map *map) synchronize_rcu(); kvfree(smap->buckets); - kfree(map); + kfree(smap); } int bpf_local_storage_map_alloc_check(union bpf_attr *attr) @@ -629,8 +623,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr) return 0; } - -struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr) +struct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr) { struct bpf_local_storage_map *smap; unsigned int i; @@ -670,9 +663,8 @@ struct bpf_map *bpf_local_storage_map_alloc(union bpf_attr *attr) smap->elem_size = sizeof(struct bpf_local_storage_elem) + attr->value_size; - smap->cache_idx = cache_idx_get(); - return &smap->map; + return smap; } int bpf_local_storage_map_check_btf(const struct bpf_map *map, @@ -768,11 +760,34 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } +DEFINE_BPF_STORAGE_CACHE(inode); + +struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = cache_idx_get_inode(); + return &smap->map; +} + +void inode_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + cache_idx_free_inode(smap->cache_idx); + bpf_local_storage_map_free(smap); +} + static int inode_storage_map_btf_id; const struct bpf_map_ops inode_storage_map_ops = { .map_alloc_check = bpf_local_storage_map_alloc_check, - .map_alloc = bpf_local_storage_map_alloc, - .map_free = bpf_local_storage_map_free, + .map_alloc = inode_storage_map_alloc, + .map_free = inode_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_inode_storage_lookup_elem, .map_update_elem = bpf_inode_storage_update_elem, diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 0ec44e819bfe..add0340e9ad3 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -396,11 +396,34 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, return -ENOTSUPP; } +DEFINE_BPF_STORAGE_CACHE(sk); + +struct bpf_map *sk_storage_map_alloc(union bpf_attr *attr) +{ + struct bpf_local_storage_map *smap; + + smap = bpf_local_storage_map_alloc(attr); + if (IS_ERR(smap)) + return ERR_CAST(smap); + + smap->cache_idx = cache_idx_get_sk(); + return &smap->map; +} + +void sk_storage_map_free(struct bpf_map *map) +{ + struct bpf_local_storage_map *smap; + + smap = (struct bpf_local_storage_map *)map; + cache_idx_free_sk(smap->cache_idx); + bpf_local_storage_map_free(smap); +} + static int sk_storage_map_btf_id; const struct bpf_map_ops sk_storage_map_ops = { .map_alloc_check = bpf_local_storage_map_alloc_check, - .map_alloc = bpf_local_storage_map_alloc, - .map_free = bpf_local_storage_map_free, + .map_alloc = sk_storage_map_alloc, + .map_free = sk_storage_map_free, .map_get_next_key = notsupp_get_next_key, .map_lookup_elem = bpf_sk_storage_lookup_elem, .map_update_elem = bpf_sk_storage_update_elem, > [...] > > > > Sure, I can also keep the sk_clone code their as well for now. > Just came to my mind. For easier review purpose, may be > first do the refactoring/renaming within bpf_sk_storage.c first and > then create another patch to move the common parts to a new > file bpf_local_storage.c. > > Not sure if it will be indeed easier to read the diff in practice. > I probably should be able to follow it either way. Since I already refactored it. I will send the next version with the refactoring and split done as a part of the Generalize bpf_sk_storage patch. If it becomes too hard to review, please let me know and I can split it. :) > > > > > > > > > There is a test in map_tests/sk_storage_map.c, in case you may not notice. > > > > I will try to make it generic as a part of this series. If it takes > > too much time, I will send a separate patch for testing > > inode_storage_map and till then we have some assurance with > > test_local_storage in test_progs. > Sure. no problem. It is mostly for you to test sk_storage to ensure things ;) > Also give some ideas on what racing conditions have > been considered in the sk_storage test and may be the inode storage > test want to stress similar code path. Thanks! I ran test_maps and it passes. I will send a separate patch that generalizes the sk_storage_map.c. - KP