On Sat, May 23, 2020 at 12:22:48AM +0200, Daniel Borkmann wrote: > On 5/22/20 4:23 AM, Martin KaFai Lau wrote: > [...] > > }; > > +/* Cannot be used as an inner map */ > > +#define BPF_MAP_NO_INNER_MAP (1 << 0) > > + > > struct bpf_map { > > /* The first two cachelines with read-mostly members of which some > > * are also accessed in fast-path (e.g. ops, max_entries). > > @@ -120,6 +123,7 @@ struct bpf_map { > > struct bpf_map_memory memory; > > char name[BPF_OBJ_NAME_LEN]; > > u32 btf_vmlinux_value_type_id; > > + u32 properties; > > bool bypass_spec_v1; > > bool frozen; /* write-once; write-protected by freeze_mutex */ > > /* 22 bytes hole */ > > @@ -1037,12 +1041,12 @@ extern const struct file_operations bpf_iter_fops; > > #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ > > extern const struct bpf_prog_ops _name ## _prog_ops; \ > > extern const struct bpf_verifier_ops _name ## _verifier_ops; > > -#define BPF_MAP_TYPE(_id, _ops) \ > > +#define BPF_MAP_TYPE_FL(_id, _ops, properties) \ > > extern const struct bpf_map_ops _ops; > > #define BPF_LINK_TYPE(_id, _name) > > #include <linux/bpf_types.h> > > #undef BPF_PROG_TYPE > > -#undef BPF_MAP_TYPE > > +#undef BPF_MAP_TYPE_FL > > #undef BPF_LINK_TYPE > > extern const struct bpf_prog_ops bpf_offload_prog_ops; > > diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h > > index 29d22752fc87..3f32702c9bf4 100644 > > --- a/include/linux/bpf_types.h > > +++ b/include/linux/bpf_types.h > > @@ -76,16 +76,25 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_LSM, lsm, > > #endif /* CONFIG_BPF_LSM */ > > #endif > > +#define BPF_MAP_TYPE(x, y) BPF_MAP_TYPE_FL(x, y, 0) > > + > > BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) > > BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) > > -BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) > > +/* prog_array->aux->{type,jited} is a runtime binding. > > + * Doing static check alone in the verifier is not enough, > > + * so BPF_MAP_NO_INNTER_MAP is needed. > > typo: INNTER Good catch. > > > + */ > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, > > + BPF_MAP_NO_INNER_MAP) > > Probably nit, but what is "FL"? flags? We do have map_flags already, but here the > BPF_MAP_NO_INNER_MAP ends up in 'properties' instead. To avoid confusion, it would > probably be better to name it 'map_flags_fixed' since this is what it really means; > fixed flags that cannot be changed/controlled when creating a map. ok. may be BPF_MAP_TYPE_FIXED_FL? > > > BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) > > #ifdef CONFIG_CGROUPS > > BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) > > #endif > > #ifdef CONFIG_CGROUP_BPF > > -BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops) > > -BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops) > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_CGROUP_STORAGE, cgroup_storage_map_ops, > > + BPF_MAP_NO_INNER_MAP) > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, cgroup_storage_map_ops, > > + BPF_MAP_NO_INNER_MAP) > > #endif > > BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) > > BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) > > @@ -116,8 +125,10 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, reuseport_array_ops) > > BPF_MAP_TYPE(BPF_MAP_TYPE_QUEUE, queue_map_ops) > > BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) > > #if defined(CONFIG_BPF_JIT) > > -BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) > > +BPF_MAP_TYPE_FL(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops, > > + BPF_MAP_NO_INNER_MAP) > > #endif > > +#undef BPF_MAP_TYPE > > BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) > > BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) > [...] > > diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c > > index 17738c93bec8..d965a1d328a9 100644 > > --- a/kernel/bpf/map_in_map.c > > +++ b/kernel/bpf/map_in_map.c > > @@ -17,13 +17,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd) > > if (IS_ERR(inner_map)) > > return inner_map; > > - /* prog_array->aux->{type,jited} is a runtime binding. > > - * Doing static check alone in the verifier is not enough. > > - */ > > - if (inner_map->map_type == BPF_MAP_TYPE_PROG_ARRAY || > > - inner_map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || > > - inner_map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE || > > - inner_map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { > > + if (inner_map->properties & BPF_MAP_NO_INNER_MAP) { > > fdput(f); > > return ERR_PTR(-ENOTSUPP); > > } > > This whole check here is currently very fragile. For example, given we forbid cgroup > local storage here, why do we not forbid socket local storage? What about other maps > like stackmap? It's quite unclear if it even works as expected and if there's also a > use-case we are aware of. Why not making this an explicit opt-in? Re: "cgroup-local-storage", my understanding is, cgroup-local-storage is local to the bpf's cgroup that it is running under, so it is not ok for a cgroup's bpf to be able to access other cgroup's local storage through map-in-map, so they are excluded here. sk-local-storage does not have this restriction. For other maps, if there is no known safety issue, why restricting it and create unnecessary API discrepancy? I think we cannot restrict the existing map either unless there is a known safety issue. > > Like explicit annotating via struct bpf_map_ops where everything is visible in one > single place where the map is defined: > > const struct bpf_map_ops array_map_ops = { > .map_alloc_check = array_map_alloc_check, > [...] > .map_flags_fixed = BPF_MAP_IN_MAP_OK, > }; I am not sure about adding it to bpf_map_ops instead of bpf_types.h. It will be easier to figure out what map types do not support MAP_IN_MAP (and other future map's fixed properties) in one place "bpf_types.h" instead of having to dig into each map src file. If the objective is to have the future map "consciously" opt-in, how about keeping the "BPF_MAP_TYPE" name as is but add a fixed_flags param as the earlier v1 and flip it from NO to OK flag. It will be clear that, it is a decision that the new map needs to make instead of a quiet 0 in "struct bpf_map_ops". For example, BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops, BPF_MAP_IN_MAP_OK) BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops, 0) BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops, BPF_MAP_IN_MAP_OK | BPF_MAP_IN_MAP_DYNAMIC_SIZE_OK) > > That way, if someone forgets to add .map_flags_fixed to a new map type, it's okay since > it's _safe_ to forget to add these flags (and okay to add in future uapi-wise) as opposed > to the other way round where one can easily miss the opt-out case and potentially crash > the machine worst case.