On 11/20/23 9:59 AM, Dave Marchevsky wrote:
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 173ec7f43ed1..114973f925ea 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -69,7 +69,17 @@ struct bpf_local_storage_data {
* the number of cachelines accessed during the cache hit case.
*/
struct bpf_local_storage_map __rcu *smap;
- u8 data[] __aligned(8);
+ /* Need to duplicate smap's map_flags as smap may be gone when
+ * it's time to free bpf_local_storage_data
+ */
+ u64 smap_map_flags;
+ /* If BPF_F_MMAPABLE, this is a void * to separately-alloc'd data
+ * Otherwise the actual mapval data lives here
+ */
+ union {
+ DECLARE_FLEX_ARRAY(u8, data) __aligned(8);
+ void *actual_data __aligned(8);
The pages (that can be mmap'ed later) feel like a specific kind of kptr.
Have you thought about allowing a kptr (pointing to some pages that can be
mmap'ed later) to be stored as one of the members of the map's value as a kptr.
bpf_local_storage_map is one of the maps that supports kptr.
struct normal_and_mmap_value {
int some_int;
int __percpu_kptr *some_cnts;
struct bpf_mmap_page __kptr *some_stats;
};
struct mmap_only_value {
struct bpf_mmap_page __kptr *some_stats;
};
[ ... ]
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 146824cc9689..9b3becbcc1a3 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -15,7 +15,8 @@
#include <linux/rcupdate_trace.h>
#include <linux/rcupdate_wait.h>
-#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
+#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK \
+ (BPF_F_NO_PREALLOC | BPF_F_CLONE | BPF_F_MMAPABLE)
static struct bpf_local_storage_map_bucket *
select_bucket(struct bpf_local_storage_map *smap,
@@ -24,6 +25,51 @@ select_bucket(struct bpf_local_storage_map *smap,
return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
}
+struct mem_cgroup *bpf_map_get_memcg(const struct bpf_map *map);
+
+void *alloc_mmapable_selem_value(struct bpf_local_storage_map *smap)
+{
+ struct mem_cgroup *memcg, *old_memcg;
+ void *ptr;
+
+ memcg = bpf_map_get_memcg(&smap->map);
+ old_memcg = set_active_memcg(memcg);
+ ptr = bpf_map_area_mmapable_alloc(PAGE_ALIGN(smap->map.value_size),
+ NUMA_NO_NODE);
+ set_active_memcg(old_memcg);
+ mem_cgroup_put(memcg);
+
+ return ptr;
+}
[ ... ]
@@ -76,10 +122,19 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
void *value, bool charge_mem, gfp_t gfp_flags)
{
struct bpf_local_storage_elem *selem;
+ void *mmapable_value = NULL;
+ u32 selem_mem;
- if (charge_mem && mem_charge(smap, owner, smap->elem_size))
+ selem_mem = selem_bytes_used(smap);
+ if (charge_mem && mem_charge(smap, owner, selem_mem))
return NULL;
+ if (smap->map.map_flags & BPF_F_MMAPABLE) {
+ mmapable_value = alloc_mmapable_selem_value(smap);
This probably is not always safe for bpf prog to do. Leaving the gfp_flags was
not used aside, the bpf local storage is moving to the bpf's memalloc because of
https://lore.kernel.org/bpf/20221118190109.1512674-1-namhyung@xxxxxxxxxx/
+ if (!mmapable_value)
+ goto err_out;
+ }
+