[PATCH bpf-next 04/10] bpf: remember if bpf_map was unprivileged and use that consistently

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



While some maps are allowed to be created with no extra privileges (like
CAP_BPF and/or CAP_NET_ADMIN), some parts of BPF verifier logic do care
about the presence of privileged for a given map.

One such place is Spectre v1 mitigations in ARRAY map. Another, extra
restrictions on maps having special embedded BTF-defined fields (like
spin lock, etc).

So record whether a map was privileged or not at the creation time and
don't recheck bpf_capable() anymore.

Handling Spectre v1 mitigation in ARRAY-like maps required a bit of code
acrobatics: extracting size adjustment into a separate
bpf_array_adjust_for_spec_v1() helper and calling it explicitly for
ARRAY, PERCPU_ARRAY, PROG_ARRAY, CGROUP_ARRAY, PERF_EVENT_ARRAY and
ARRAY_OF_MAPS to adjust passed in bpf_attrs, because these adjustments
have to happen before map creation. Alternative would be to extend
map_ops->map_alloc() callback with unprivileged flag, which seemed
excessive, as all other maps don't care, but could be done if preferred.

But once map->unpriv flag is recorded, handing BTF-defined fields was
a breeze, dropping bpf_capable() check buried deeper in map_check_btf()
validation logic.

Once extra bit that required consideration was remembering unprivileged
bit when dealing with map-in-maps and taking it into account when
checking map metadata compatibility. Given unprivileged bit is important
for correctness, it should be taken into account just like key and value
sizes.

Signed-off-by: Andrii Nakryiko <andrii@xxxxxxxxxx>
---
 include/linux/bpf.h     |  4 ++-
 kernel/bpf/arraymap.c   | 59 ++++++++++++++++++++++++-----------------
 kernel/bpf/map_in_map.c |  3 ++-
 kernel/bpf/syscall.c    | 25 +++++++++++++++--
 kernel/bpf/verifier.c   |  6 ++---
 5 files changed, 65 insertions(+), 32 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 456f33b9d205..479657bb113e 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -273,7 +273,7 @@ struct bpf_map {
 		bool jited;
 		bool xdp_has_frags;
 	} owner;
-	bool bypass_spec_v1;
+	bool unpriv;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
 };
 
@@ -2058,6 +2058,8 @@ static inline bool bpf_bypass_spec_v1(void)
 	return perfmon_capable();
 }
 
+int bpf_array_adjust_for_spec_v1(union bpf_attr *attr);
+
 static inline bool bpf_bypass_spec_v4(void)
 {
 	return perfmon_capable();
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 2058e89b5ddd..a51d22a3afd1 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -77,18 +77,9 @@ int array_map_alloc_check(union bpf_attr *attr)
 	return 0;
 }
 
-static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+static u32 array_index_mask(u32 max_entries)
 {
-	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
-	int numa_node = bpf_map_attr_numa_node(attr);
-	u32 elem_size, index_mask, max_entries;
-	bool bypass_spec_v1 = bpf_bypass_spec_v1();
-	u64 array_size, mask64;
-	struct bpf_array *array;
-
-	elem_size = round_up(attr->value_size, 8);
-
-	max_entries = attr->max_entries;
+	u64 mask64;
 
 	/* On 32 bit archs roundup_pow_of_two() with max_entries that has
 	 * upper most bit set in u32 space is undefined behavior due to
@@ -98,17 +89,38 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	mask64 = 1ULL << mask64;
 	mask64 -= 1;
 
-	index_mask = mask64;
-	if (!bypass_spec_v1) {
-		/* round up array size to nearest power of 2,
-		 * since cpu will speculate within index_mask limits
-		 */
-		max_entries = index_mask + 1;
-		/* Check for overflows. */
-		if (max_entries < attr->max_entries)
-			return ERR_PTR(-E2BIG);
-	}
+	return (u32)mask64;
+}
+
+int bpf_array_adjust_for_spec_v1(union bpf_attr *attr)
+{
+	u32 max_entries, index_mask;
+
+	/* round up array size to nearest power of 2,
+	 * since cpu will speculate within index_mask limits
+	 */
+	index_mask = array_index_mask(attr->max_entries);
+	max_entries = index_mask + 1;
+	/* Check for overflows. */
+	if (max_entries < attr->max_entries)
+		return -E2BIG;
+
+	attr->max_entries = max_entries;
+	return 0;
+}
 
+static struct bpf_map *array_map_alloc(union bpf_attr *attr)
+{
+	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
+	int numa_node = bpf_map_attr_numa_node(attr);
+	u32 elem_size, index_mask, max_entries;
+	u64 array_size;
+	struct bpf_array *array;
+
+	elem_size = round_up(attr->value_size, 8);
+
+	max_entries = attr->max_entries;
+	index_mask = array_index_mask(max_entries);
 	array_size = sizeof(*array);
 	if (percpu) {
 		array_size += (u64) max_entries * sizeof(void *);
@@ -140,7 +152,6 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	if (!array)
 		return ERR_PTR(-ENOMEM);
 	array->index_mask = index_mask;
-	array->map.bypass_spec_v1 = bypass_spec_v1;
 
 	/* copy mandatory map attributes */
 	bpf_map_init_from_attr(&array->map, attr);
@@ -216,7 +227,7 @@ static int array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	if (!map->bypass_spec_v1) {
+	if (map->unpriv) {
 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
 	} else {
@@ -1373,7 +1384,7 @@ static int array_of_map_gen_lookup(struct bpf_map *map,
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	if (!map->bypass_spec_v1) {
+	if (map->unpriv) {
 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
 	} else {
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index 2c5c64c2a53b..21cb4be92097 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -41,6 +41,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 		goto put;
 	}
 
+	inner_map_meta->unpriv = inner_map->unpriv;
 	inner_map_meta->map_type = inner_map->map_type;
 	inner_map_meta->key_size = inner_map->key_size;
 	inner_map_meta->value_size = inner_map->value_size;
@@ -69,7 +70,6 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 	/* Misc members not needed in bpf_map_meta_equal() check. */
 	inner_map_meta->ops = inner_map->ops;
 	if (inner_map->ops == &array_map_ops) {
-		inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
 		container_of(inner_map_meta, struct bpf_array, map)->index_mask =
 		     container_of(inner_map, struct bpf_array, map)->index_mask;
 	}
@@ -98,6 +98,7 @@ bool bpf_map_meta_equal(const struct bpf_map *meta0,
 		meta0->key_size == meta1->key_size &&
 		meta0->value_size == meta1->value_size &&
 		meta0->map_flags == meta1->map_flags &&
+		meta0->unpriv == meta1->unpriv &&
 		btf_record_equal(meta0->record, meta1->record);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 92127eaee467..ffc61a764fe5 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1010,7 +1010,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
 
-		if (!bpf_capable()) {
+		if (map->unpriv) {
 			ret = -EPERM;
 			goto free_map_tab;
 		}
@@ -1100,6 +1100,7 @@ static int map_create(union bpf_attr *attr)
 	int numa_node = bpf_map_attr_numa_node(attr);
 	u32 map_type = attr->map_type;
 	struct bpf_map *map;
+	bool unpriv;
 	int f_flags;
 	int err;
 
@@ -1176,6 +1177,7 @@ static int map_create(union bpf_attr *attr)
 	case BPF_MAP_TYPE_CPUMAP:
 		if (!bpf_capable())
 			return -EPERM;
+		unpriv = false;
 		break;
 	case BPF_MAP_TYPE_SOCKMAP:
 	case BPF_MAP_TYPE_SOCKHASH:
@@ -1184,6 +1186,7 @@ static int map_create(union bpf_attr *attr)
 	case BPF_MAP_TYPE_XSKMAP:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
+		unpriv = false;
 		break;
 	case BPF_MAP_TYPE_ARRAY:
 	case BPF_MAP_TYPE_PERCPU_ARRAY:
@@ -1198,18 +1201,36 @@ static int map_create(union bpf_attr *attr)
 	case BPF_MAP_TYPE_USER_RINGBUF:
 	case BPF_MAP_TYPE_CGROUP_STORAGE:
 	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
-		/* unprivileged */
+		/* unprivileged is OK, but we still record if we had CAP_BPF */
+		unpriv = !bpf_capable();
 		break;
 	default:
 		WARN(1, "unsupported map type %d", map_type);
 		return -EPERM;
 	}
 
+	/* ARRAY-like maps have special sizing provisions for mitigating Spectre v1 */
+	if (unpriv) {
+		switch (map_type) {
+		case BPF_MAP_TYPE_ARRAY:
+		case BPF_MAP_TYPE_PERCPU_ARRAY:
+		case BPF_MAP_TYPE_PROG_ARRAY:
+		case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+		case BPF_MAP_TYPE_CGROUP_ARRAY:
+		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+			err = bpf_array_adjust_for_spec_v1(attr);
+			if (err)
+				return err;
+			break;
+		}
+	}
+
 	map = ops->map_alloc(attr);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
 	map->ops = ops;
 	map->map_type = map_type;
+	map->unpriv = unpriv;
 
 	err = bpf_obj_name_cpy(map->name, attr->map_name,
 			       sizeof(attr->map_name));
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ff4a8ab99f08..481aaf189183 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -8731,11 +8731,9 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 	}
 
 	if (!BPF_MAP_PTR(aux->map_ptr_state))
-		bpf_map_ptr_store(aux, meta->map_ptr,
-				  !meta->map_ptr->bypass_spec_v1);
+		bpf_map_ptr_store(aux, meta->map_ptr, meta->map_ptr->unpriv);
 	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
-		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
-				  !meta->map_ptr->bypass_spec_v1);
+		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, meta->map_ptr->unpriv);
 	return 0;
 }
 
-- 
2.34.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux