Re: [PATCH bpf-next v6 04/10] bpf: hold module for bpf_struct_ops_map.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 10/26/23 14:11, Eduard Zingerman wrote:
On Sat, 2023-10-21 at 22:03 -0700, thinker.li@xxxxxxxxx wrote:
From: Kui-Feng Lee <thinker.li@xxxxxxxxx>

To ensure that a module remains accessible whenever a struct_ops object of
a struct_ops type provided by the module is still in use.

Signed-off-by: Kui-Feng Lee <thinker.li@xxxxxxxxx>
---
  include/linux/bpf.h         |  1 +
  include/linux/btf.h         |  2 +-
  kernel/bpf/bpf_struct_ops.c | 70 ++++++++++++++++++++++++++++++-------
  3 files changed, 60 insertions(+), 13 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4f3b67932ded..26feb8a2da4f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1626,6 +1626,7 @@ struct bpf_struct_ops {
  	void (*unreg)(void *kdata);
  	int (*update)(void *kdata, void *old_kdata);
  	int (*validate)(void *kdata);
+	struct module *owner;
  	const char *name;
  	struct btf_func_model func_models[BPF_STRUCT_OPS_MAX_NR_MEMBERS];
  };
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 8e37f7eb02c7..6a64b372b7a0 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -575,7 +575,7 @@ struct bpf_struct_ops;
  struct bpf_struct_ops_desc;
struct bpf_struct_ops_desc *
-btf_add_struct_ops(struct bpf_struct_ops *st_ops);
+btf_add_struct_ops(struct btf *btf, struct bpf_struct_ops *st_ops);
  const struct bpf_struct_ops_desc *
  btf_get_struct_ops(struct btf *btf, u32 *ret_cnt);
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 0bc21a39257d..413a3f8b26ba 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -388,6 +388,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
  	const struct btf_member *member;
  	const struct btf_type *t = st_ops_desc->type;
  	struct bpf_tramp_links *tlinks;
+	struct module *mod = NULL;
  	void *udata, *kdata;
  	int prog_fd, err;
  	void *image, *image_end;
@@ -425,6 +426,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
  		goto unlock;
  	}
+ if (st_ops_desc->btf != btf_vmlinux) {
+		mod = btf_try_get_module(st_ops_desc->btf);
+		if (!mod) {
+			err = -EBUSY;

Nit: there is a disagreement about error code returned for
      failing btf_try_get_module() across verifier code base:
      - EINVAL is used 2 times;
      - ENXIO is used 3 times;
      - ENOTSUPP is used once.
      Are you sure EBUSY is a good choice here?


You are right. I think EINVAL is better or this case.



+			goto unlock;
+		}
+	}
+
  	memcpy(uvalue, value, map->value_size);
udata = &uvalue->data;
@@ -552,6 +561,10 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
  		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
  		 */
  		smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
+		/* Hold the owner module until the struct_ops is
+		 * unregistered
+		 */
+		mod = NULL;
  		goto unlock;
  	}
@@ -568,6 +581,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
  	memset(uvalue, 0, map->value_size);
  	memset(kvalue, 0, map->value_size);
  unlock:
+	module_put(mod);
  	kfree(tlinks);
  	mutex_unlock(&st_map->lock);
  	return err;
@@ -588,6 +602,7 @@ static long bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
  	switch (prev_state) {
  	case BPF_STRUCT_OPS_STATE_INUSE:
  		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+		module_put(st_map->st_ops_desc->st_ops->owner);
  		bpf_map_put(map);
  		return 0;
  	case BPF_STRUCT_OPS_STATE_TOBEFREE:
@@ -674,6 +689,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
  	size_t st_map_size;
  	struct bpf_struct_ops_map *st_map;
  	const struct btf_type *t, *vt;
+	struct module *mod = NULL;
  	struct bpf_map *map;
  	int ret;
@@ -681,9 +697,17 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
  	if (!st_ops_desc)
  		return ERR_PTR(-ENOTSUPP);
+ if (st_ops_desc->btf != btf_vmlinux) {
+		mod = btf_try_get_module(st_ops_desc->btf);
+		if (!mod)
+			return ERR_PTR(-EINVAL);
+	}
+
  	vt = st_ops_desc->value_type;
-	if (attr->value_size != vt->size)
-		return ERR_PTR(-EINVAL);
+	if (attr->value_size != vt->size) {
+		ret = -EINVAL;
+		goto errout;
+	}
t = st_ops_desc->type; @@ -694,17 +718,17 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
  		(vt->size - sizeof(struct bpf_struct_ops_value));
st_map = bpf_map_area_alloc(st_map_size, NUMA_NO_NODE);
-	if (!st_map)
-		return ERR_PTR(-ENOMEM);
+	if (!st_map) {
+		ret = -ENOMEM;
+		goto errout;
+	}
st_map->st_ops_desc = st_ops_desc;
  	map = &st_map->map;
ret = bpf_jit_charge_modmem(PAGE_SIZE);
-	if (ret) {
-		__bpf_struct_ops_map_free(map);
-		return ERR_PTR(ret);
-	}
+	if (ret)
+		goto errout_free;
st_map->image = bpf_jit_alloc_exec(PAGE_SIZE);
  	if (!st_map->image) {
@@ -713,23 +737,32 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
  		 * here.
  		 */
  		bpf_jit_uncharge_modmem(PAGE_SIZE);
-		__bpf_struct_ops_map_free(map);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto errout_free;
  	}
  	st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
  	st_map->links =
  		bpf_map_area_alloc(btf_type_vlen(t) * sizeof(struct bpf_links *),
  				   NUMA_NO_NODE);
  	if (!st_map->uvalue || !st_map->links) {
-		__bpf_struct_ops_map_free(map);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto errout_free;
  	}
mutex_init(&st_map->lock);
  	set_vm_flush_reset_perms(st_map->image);
  	bpf_map_init_from_attr(map, attr);
+ module_put(mod);
+
  	return map;
+
+errout_free:
+	__bpf_struct_ops_map_free(map);
+	btf = NULL;		/* has been released */
+errout:
+	module_put(mod);
+	return ERR_PTR(ret);
  }
static u64 bpf_struct_ops_map_mem_usage(const struct bpf_map *map)
@@ -811,6 +844,7 @@ static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
  		 * bpf_struct_ops_link_create() fails to register.
  		 */
  		st_map->st_ops_desc->st_ops->unreg(&st_map->kvalue.data);
+		module_put(st_map->st_ops_desc->st_ops->owner);
  		bpf_map_put(&st_map->map);
  	}
  	kfree(st_link);
@@ -857,6 +891,10 @@ static int bpf_struct_ops_map_link_update(struct bpf_link *link, struct bpf_map
  	if (!bpf_struct_ops_valid_to_reg(new_map))
  		return -EINVAL;
+ /* The old map is holding the refcount for the owner module. The
+	 * ownership of the owner module refcount is going to be
+	 * transferred from the old map to the new map.
+	 */
  	if (!st_map->st_ops_desc->st_ops->update)
  		return -EOPNOTSUPP;
@@ -902,6 +940,7 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
  	struct bpf_link_primer link_primer;
  	struct bpf_struct_ops_map *st_map;
  	struct bpf_map *map;
+	struct btf *btf;
  	int err;
map = bpf_map_get(attr->link_create.map_fd);
@@ -926,8 +965,15 @@ int bpf_struct_ops_link_create(union bpf_attr *attr)
  	if (err)
  		goto err_out;
+ /* Hold the owner module until the struct_ops is unregistered. */
+	btf = st_map->st_ops_desc->btf;
+	if (btf != btf_vmlinux && !btf_try_get_module(btf)) {
+		err = -EINVAL;
+		goto err_out;
+	}
  	err = st_map->st_ops_desc->st_ops->reg(st_map->kvalue.data);
  	if (err) {
+		module_put(st_map->st_ops_desc->st_ops->owner);
  		bpf_link_cleanup(&link_primer);
  		link = NULL;
  		goto err_out;





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux