[PATCH bpf-next 1/3] bpf: Introduce local_storage exclusive caching option

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Allow local_storage maps to claim exclusive use of a cache slot in
struct bpf_local_storage's cache. When a local_storage map claims a
slot and is cached in via bpf_local_storage_lookup, it will not be
replaced until the map is free'd. As a result, after a local_storage map
is alloc'd for a specific bpf_local_storage, lookup calls after the
first will quickly find the correct map.

When requesting an exclusive cache slot, bpf_local_storage_cache_idx_get
can now fail if all slots are already claimed. Because a map's cache_idx
is assigned when the bpf_map is allocated - which occurs before the
program runs - the map load and subsequent prog load will fail.

A bit in struct bpf_map's map_extra is used to designate whether a map
would like to claim an exclusive slot. Similarly, bitmap idx_exclusive
is added to bpf_local_storage_cache to track whether a slot is
exclusively claimed. Functions that manipulate the cache are modified to
test for BPF_LOCAL_STORAGE_FORCE_CACHE bit and test/set idx_exclusive
where necessary.

When a map exclusively claims a cache slot, non-exclusive local_storage
maps which were previously assigned the same cache_idx are not
migrated to unclaimed cache_idx. Such a migration would require full
iteration of the cache list and necessitate a reverse migration on map
free to even things out. Since a used cache slot will only be
exclusively claimed if no empty slot exists, the additional complexity
was deemed unnecessary.

Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>
---
 include/linux/bpf_local_storage.h |  6 +++--
 include/uapi/linux/bpf.h          | 14 +++++++++++
 kernel/bpf/bpf_inode_storage.c    | 16 +++++++++---
 kernel/bpf/bpf_local_storage.c    | 42 +++++++++++++++++++++++++------
 kernel/bpf/bpf_task_storage.c     | 16 +++++++++---
 kernel/bpf/syscall.c              |  7 ++++--
 net/core/bpf_sk_storage.c         | 15 ++++++++---
 7 files changed, 95 insertions(+), 21 deletions(-)

diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 493e63258497..d87405a1b65d 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -109,6 +109,7 @@ struct bpf_local_storage {
 struct bpf_local_storage_cache {
 	spinlock_t idx_lock;
 	u64 idx_usage_counts[BPF_LOCAL_STORAGE_CACHE_SIZE];
+	DECLARE_BITMAP(idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE);
 };
 
 #define DEFINE_BPF_STORAGE_CACHE(name)				\
@@ -116,9 +117,10 @@ static struct bpf_local_storage_cache name = {			\
 	.idx_lock = __SPIN_LOCK_UNLOCKED(name.idx_lock),	\
 }
 
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache);
+int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache,
+				    u64 flags);
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
-				      u16 idx);
+				       u16 idx, u64 flags);
 
 /* Helper functions for bpf_local_storage */
 int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d14b10b85e51..566035bc2f08 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1257,6 +1257,18 @@ enum bpf_stack_build_id_status {
 	BPF_STACK_BUILD_ID_IP = 2,
 };
 
+/* Flags passed in map_extra when creating local_storage maps
+ * of types: BPF_MAP_TYPE_INODE_STORAGE
+ *           BPF_MAP_TYPE_TASK_STORAGE
+ *           BPF_MAP_TYPE_SK_STORAGE
+ */
+enum bpf_local_storage_extra_flags {
+	/* Give the map exclusive use of a local_storage cache slot
+	 * or fail map alloc
+	 */
+	BPF_LOCAL_STORAGE_FORCE_CACHE = (1U << 0),
+};
+
 #define BPF_BUILD_ID_SIZE 20
 struct bpf_stack_build_id {
 	__s32		status;
@@ -1296,6 +1308,8 @@ union bpf_attr {
 		 * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the
 		 * number of hash functions (if 0, the bloom filter will default
 		 * to using 5 hash functions).
+		 * BPF_MAP_TYPE_{INODE,TASK,SK}_STORAGE - local_storage specific
+		 * flags (see bpf_local_storage_extra_flags)
 		 */
 		__u64	map_extra;
 	};
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 96be8d518885..8b32adc23fc3 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -227,12 +227,21 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key,
 static struct bpf_map *inode_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
+	int cache_idx_or_err;
+
+	cache_idx_or_err = bpf_local_storage_cache_idx_get(&inode_cache,
+							   attr->map_extra);
+	if (cache_idx_or_err < 0)
+		return ERR_PTR(cache_idx_or_err);
 
 	smap = bpf_local_storage_map_alloc(attr);
-	if (IS_ERR(smap))
+	if (IS_ERR(smap)) {
+		bpf_local_storage_cache_idx_free(&inode_cache, (u16)cache_idx_or_err,
+						 attr->map_extra);
 		return ERR_CAST(smap);
+	}
 
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&inode_cache);
+	smap->cache_idx = (u16)cache_idx_or_err;
 	return &smap->map;
 }
 
@@ -241,7 +250,8 @@ static void inode_storage_map_free(struct bpf_map *map)
 	struct bpf_local_storage_map *smap;
 
 	smap = (struct bpf_local_storage_map *)map;
-	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx);
+	bpf_local_storage_cache_idx_free(&inode_cache, smap->cache_idx,
+					 map->map_extra);
 	bpf_local_storage_map_free(smap, NULL);
 }
 
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 01aa2b51ec4d..b23080247bef 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -231,12 +231,19 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 {
 	struct bpf_local_storage_data *sdata;
 	struct bpf_local_storage_elem *selem;
+	struct bpf_local_storage_map *cached;
+	bool cached_exclusive = false;
 
 	/* Fast path (cache hit) */
 	sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
 				      bpf_rcu_lock_held());
-	if (sdata && rcu_access_pointer(sdata->smap) == smap)
-		return sdata;
+	if (sdata) {
+		if (rcu_access_pointer(sdata->smap) == smap)
+			return sdata;
+
+		cached = rcu_dereference_check(sdata->smap, bpf_rcu_lock_held());
+		cached_exclusive = cached->map.map_extra & BPF_LOCAL_STORAGE_FORCE_CACHE;
+	}
 
 	/* Slow path (cache miss) */
 	hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
@@ -248,7 +255,7 @@ bpf_local_storage_lookup(struct bpf_local_storage *local_storage,
 		return NULL;
 
 	sdata = SDATA(selem);
-	if (cacheit_lockit) {
+	if (cacheit_lockit && !cached_exclusive) {
 		unsigned long flags;
 
 		/* spinlock is needed to avoid racing with the
@@ -482,15 +489,27 @@ bpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
 	return ERR_PTR(err);
 }
 
-u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
+int bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache,
+				    u64 flags)
 {
+	bool exclusive = flags & BPF_LOCAL_STORAGE_FORCE_CACHE;
+	bool adding_to_full = false;
 	u64 min_usage = U64_MAX;
-	u16 i, res = 0;
+	int res = 0;
+	u16 i;
 
 	spin_lock(&cache->idx_lock);
 
+	if (bitmap_full(cache->idx_exclusive, BPF_LOCAL_STORAGE_CACHE_SIZE)) {
+		res = -ENOMEM;
+		adding_to_full = true;
+		if (exclusive)
+			goto out;
+	}
+
 	for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
-		if (cache->idx_usage_counts[i] < min_usage) {
+		if ((adding_to_full || !test_bit(i, cache->idx_exclusive)) &&
+		    cache->idx_usage_counts[i] < min_usage) {
 			min_usage = cache->idx_usage_counts[i];
 			res = i;
 
@@ -499,17 +518,23 @@ u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
 				break;
 		}
 	}
+
+	if (exclusive)
+		set_bit(res, cache->idx_exclusive);
 	cache->idx_usage_counts[res]++;
 
+out:
 	spin_unlock(&cache->idx_lock);
 
 	return res;
 }
 
 void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
-				      u16 idx)
+				      u16 idx, u64 flags)
 {
 	spin_lock(&cache->idx_lock);
+	if (flags & BPF_LOCAL_STORAGE_FORCE_CACHE)
+		clear_bit(idx, cache->idx_exclusive);
 	cache->idx_usage_counts[idx]--;
 	spin_unlock(&cache->idx_lock);
 }
@@ -583,7 +608,8 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr)
 	    attr->max_entries ||
 	    attr->key_size != sizeof(int) || !attr->value_size ||
 	    /* Enforce BTF for userspace sk dumping */
-	    !attr->btf_key_type_id || !attr->btf_value_type_id)
+	    !attr->btf_key_type_id || !attr->btf_value_type_id ||
+	    attr->map_extra & ~BPF_LOCAL_STORAGE_FORCE_CACHE)
 		return -EINVAL;
 
 	if (!bpf_capable())
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 6638a0ecc3d2..bf7b098d15c9 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -289,12 +289,21 @@ static int notsupp_get_next_key(struct bpf_map *map, void *key, void *next_key)
 static struct bpf_map *task_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
+	int cache_idx_or_err;
+
+	cache_idx_or_err = bpf_local_storage_cache_idx_get(&task_cache,
+							   attr->map_extra);
+	if (cache_idx_or_err < 0)
+		return ERR_PTR(cache_idx_or_err);
 
 	smap = bpf_local_storage_map_alloc(attr);
-	if (IS_ERR(smap))
+	if (IS_ERR(smap)) {
+		bpf_local_storage_cache_idx_free(&task_cache, (u16)cache_idx_or_err,
+						 attr->map_extra);
 		return ERR_CAST(smap);
+	}
 
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&task_cache);
+	smap->cache_idx = (u16)cache_idx_or_err;
 	return &smap->map;
 }
 
@@ -303,7 +312,8 @@ static void task_storage_map_free(struct bpf_map *map)
 	struct bpf_local_storage_map *smap;
 
 	smap = (struct bpf_local_storage_map *)map;
-	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx);
+	bpf_local_storage_cache_idx_free(&task_cache, smap->cache_idx,
+					 map->map_extra);
 	bpf_local_storage_map_free(smap, &bpf_task_storage_busy);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e9621cfa09f2..9fd610e53840 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -847,8 +847,11 @@ static int map_create(union bpf_attr *attr)
 		return -EINVAL;
 	}
 
-	if (attr->map_type != BPF_MAP_TYPE_BLOOM_FILTER &&
-	    attr->map_extra != 0)
+	if (!(attr->map_type == BPF_MAP_TYPE_BLOOM_FILTER ||
+	      attr->map_type == BPF_MAP_TYPE_INODE_STORAGE ||
+	      attr->map_type == BPF_MAP_TYPE_TASK_STORAGE ||
+	      attr->map_type == BPF_MAP_TYPE_SK_STORAGE) &&
+	     attr->map_extra != 0)
 		return -EINVAL;
 
 	f_flags = bpf_get_file_flag(attr->map_flags);
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index e3ac36380520..f6a95f525f50 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -90,19 +90,28 @@ static void bpf_sk_storage_map_free(struct bpf_map *map)
 	struct bpf_local_storage_map *smap;
 
 	smap = (struct bpf_local_storage_map *)map;
-	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx);
+	bpf_local_storage_cache_idx_free(&sk_cache, smap->cache_idx, map->map_extra);
 	bpf_local_storage_map_free(smap, NULL);
 }
 
 static struct bpf_map *bpf_sk_storage_map_alloc(union bpf_attr *attr)
 {
 	struct bpf_local_storage_map *smap;
+	int cache_idx_or_err;
+
+	cache_idx_or_err = bpf_local_storage_cache_idx_get(&sk_cache,
+							   attr->map_extra);
+	if (cache_idx_or_err < 0)
+		return ERR_PTR(cache_idx_or_err);
 
 	smap = bpf_local_storage_map_alloc(attr);
-	if (IS_ERR(smap))
+	if (IS_ERR(smap)) {
+		bpf_local_storage_cache_idx_free(&sk_cache, (u16)cache_idx_or_err,
+						 attr->map_extra);
 		return ERR_CAST(smap);
+	}
 
-	smap->cache_idx = bpf_local_storage_cache_idx_get(&sk_cache);
+	smap->cache_idx = (u16)cache_idx_or_err;
 	return &smap->map;
 }
 
-- 
2.30.2





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux