[PATCH v1 bpf-next 2/9] bpf: Introduce opaque bpf_refcount struct and add btf_record plumbing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A 'struct bpf_refcount' is added to the set of opaque uapi/bpf.h types
meant for use in BPF programs. Similarly to other opaque types like
bpf_spin_lock and bpf_rbtree_node, the verifier needs to know where in
user-defined struct types a bpf_refcount can be located, so necessary
btf_record plumbing is added to enable this. bpf_refcount is sized to
hold a refcount_t.

Similarly to bpf_spin_lock, the offset of a bpf_refcount is cached in
btf_record as refcount_off in addition to being in the field array.
Caching refcount_off makes sense for this field because further patches
in the series will modify functions that take local kptrs (e.g.
bpf_obj_drop) to change their behavior if the type they're operating on
is refcounted. So enabling fast "is this type refcounted?" checks is
desirable.

No such verifier behavior changes are introduced in this patch, just
logic to recognize 'struct bpf_refcount' in btf_record.

Signed-off-by: Dave Marchevsky <davemarchevsky@xxxxxx>
---
 include/linux/bpf.h            |  8 ++++++++
 include/uapi/linux/bpf.h       |  4 ++++
 kernel/bpf/btf.c               | 12 +++++++++++-
 kernel/bpf/syscall.c           |  6 +++++-
 tools/include/uapi/linux/bpf.h |  4 ++++
 5 files changed, 32 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 314f911fcf91..afb82e623663 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -187,6 +187,7 @@ enum btf_field_type {
 	BPF_RB_NODE    = (1 << 7),
 	BPF_GRAPH_NODE_OR_ROOT = BPF_LIST_NODE | BPF_LIST_HEAD |
 				 BPF_RB_NODE | BPF_RB_ROOT,
+	BPF_REFCOUNT   = (1 << 8),
 };
 
 typedef void (*btf_dtor_kfunc_t)(void *);
@@ -223,6 +224,7 @@ struct btf_record {
 	u32 field_mask;
 	int spin_lock_off;
 	int timer_off;
+	int refcount_off;
 	struct btf_field fields[];
 };
 
@@ -293,6 +295,8 @@ static inline const char *btf_field_type_name(enum btf_field_type type)
 		return "bpf_rb_root";
 	case BPF_RB_NODE:
 		return "bpf_rb_node";
+	case BPF_REFCOUNT:
+		return "bpf_refcount";
 	default:
 		WARN_ON_ONCE(1);
 		return "unknown";
@@ -317,6 +321,8 @@ static inline u32 btf_field_type_size(enum btf_field_type type)
 		return sizeof(struct bpf_rb_root);
 	case BPF_RB_NODE:
 		return sizeof(struct bpf_rb_node);
+	case BPF_REFCOUNT:
+		return sizeof(struct bpf_refcount);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
@@ -341,6 +347,8 @@ static inline u32 btf_field_type_align(enum btf_field_type type)
 		return __alignof__(struct bpf_rb_root);
 	case BPF_RB_NODE:
 		return __alignof__(struct bpf_rb_node);
+	case BPF_REFCOUNT:
+		return __alignof__(struct bpf_refcount);
 	default:
 		WARN_ON_ONCE(1);
 		return 0;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e3d3b5160d26..678b48374518 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6975,6 +6975,10 @@ struct bpf_rb_node {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_refcount {
+	__u32 :32;
+} __attribute__((aligned(4)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index acd379361b4c..9fb29b41247c 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3391,6 +3391,7 @@ static int btf_get_field_type(const char *name, u32 field_mask, u32 *seen_mask,
 	field_mask_test_name(BPF_LIST_NODE, "bpf_list_node");
 	field_mask_test_name(BPF_RB_ROOT,   "bpf_rb_root");
 	field_mask_test_name(BPF_RB_NODE,   "bpf_rb_node");
+	field_mask_test_name(BPF_REFCOUNT,  "bpf_refcount");
 
 	/* Only return BPF_KPTR when all other types with matchable names fail */
 	if (field_mask & BPF_KPTR) {
@@ -3439,6 +3440,7 @@ static int btf_find_struct_field(const struct btf *btf,
 		case BPF_TIMER:
 		case BPF_LIST_NODE:
 		case BPF_RB_NODE:
+		case BPF_REFCOUNT:
 			ret = btf_find_struct(btf, member_type, off, sz, field_type,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3504,6 +3506,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
 		case BPF_TIMER:
 		case BPF_LIST_NODE:
 		case BPF_RB_NODE:
+		case BPF_REFCOUNT:
 			ret = btf_find_struct(btf, var_type, off, sz, field_type,
 					      idx < info_cnt ? &info[idx] : &tmp);
 			if (ret < 0)
@@ -3734,6 +3737,7 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 
 	rec->spin_lock_off = -EINVAL;
 	rec->timer_off = -EINVAL;
+	rec->refcount_off = -EINVAL;
 	for (i = 0; i < cnt; i++) {
 		field_type_size = btf_field_type_size(info_arr[i].type);
 		if (info_arr[i].off + field_type_size > value_size) {
@@ -3763,6 +3767,11 @@ struct btf_record *btf_parse_fields(const struct btf *btf, const struct btf_type
 			/* Cache offset for faster lookup at runtime */
 			rec->timer_off = rec->fields[i].offset;
 			break;
+		case BPF_REFCOUNT:
+			WARN_ON_ONCE(rec->refcount_off >= 0);
+			/* Cache offset for faster lookup at runtime */
+			rec->refcount_off = rec->fields[i].offset;
+			break;
 		case BPF_KPTR_UNREF:
 		case BPF_KPTR_REF:
 			ret = btf_parse_kptr(btf, &rec->fields[i], &info_arr[i]);
@@ -5307,6 +5316,7 @@ static const char *alloc_obj_fields[] = {
 	"bpf_list_node",
 	"bpf_rb_root",
 	"bpf_rb_node",
+	"bpf_refcount",
 };
 
 static struct btf_struct_metas *
@@ -5380,7 +5390,7 @@ btf_parse_struct_metas(struct bpf_verifier_log *log, struct btf *btf)
 		type = &tab->types[tab->cnt];
 		type->btf_id = i;
 		record = btf_parse_fields(btf, t, BPF_SPIN_LOCK | BPF_LIST_HEAD | BPF_LIST_NODE |
-						  BPF_RB_ROOT | BPF_RB_NODE, t->size);
+						  BPF_RB_ROOT | BPF_RB_NODE | BPF_REFCOUNT, t->size);
 		/* The record cannot be unset, treat it as an error if so */
 		if (IS_ERR_OR_NULL(record)) {
 			ret = PTR_ERR_OR_ZERO(record) ?: -EFAULT;
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7fb962858d30..62f48ad72ac4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -552,6 +552,7 @@ void btf_record_free(struct btf_record *rec)
 		case BPF_RB_NODE:
 		case BPF_SPIN_LOCK:
 		case BPF_TIMER:
+		case BPF_REFCOUNT:
 			/* Nothing to release */
 			break;
 		default:
@@ -599,6 +600,7 @@ struct btf_record *btf_record_dup(const struct btf_record *rec)
 		case BPF_RB_NODE:
 		case BPF_SPIN_LOCK:
 		case BPF_TIMER:
+		case BPF_REFCOUNT:
 			/* Nothing to acquire */
 			break;
 		default:
@@ -705,6 +707,7 @@ void bpf_obj_free_fields(const struct btf_record *rec, void *obj)
 			break;
 		case BPF_LIST_NODE:
 		case BPF_RB_NODE:
+		case BPF_REFCOUNT:
 			break;
 		default:
 			WARN_ON_ONCE(1);
@@ -1032,7 +1035,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 
 	map->record = btf_parse_fields(btf, value_type,
 				       BPF_SPIN_LOCK | BPF_TIMER | BPF_KPTR | BPF_LIST_HEAD |
-				       BPF_RB_ROOT,
+				       BPF_RB_ROOT | BPF_REFCOUNT,
 				       map->value_size);
 	if (!IS_ERR_OR_NULL(map->record)) {
 		int i;
@@ -1071,6 +1074,7 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
 				break;
 			case BPF_KPTR_UNREF:
 			case BPF_KPTR_REF:
+			case BPF_REFCOUNT:
 				if (map->map_type != BPF_MAP_TYPE_HASH &&
 				    map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
 				    map->map_type != BPF_MAP_TYPE_LRU_HASH &&
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d6c5a022ae28..775a3c775e23 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6975,6 +6975,10 @@ struct bpf_rb_node {
 	__u64 :64;
 } __attribute__((aligned(8)));
 
+struct bpf_refcount {
+	__u32 :32;
+} __attribute__((aligned(4)));
+
 struct bpf_sysctl {
 	__u32	write;		/* Sysctl is being read (= 0) or written (= 1).
 				 * Allows 1,2,4-byte read, but no write.
-- 
2.34.1





[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux