The devmap map-value can be read from BPF-prog side, and could be used for a storage area per device. This could e.g. contain info on headers that need to be added when packet egress this device. This patchset adds a dynamic storage member to struct bpf_devmap_val. More importantly the struct bpf_devmap_val is made dynamic via leveraging and requiring BTF for struct sizes above 4. The only mandatory struct member is 'ifindex' with a fixed offset of zero. Signed-off-by: Jesper Dangaard Brouer <brouer@xxxxxxxxxx> --- kernel/bpf/devmap.c | 216 ++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 185 insertions(+), 31 deletions(-) diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 4ab67b2d8159..9cf2dadcc0fe 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -48,6 +48,7 @@ #include <net/xdp.h> #include <linux/filter.h> #include <trace/events/xdp.h> +#include <linux/btf.h> #define DEV_CREATE_FLAG_MASK \ (BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY) @@ -60,13 +61,30 @@ struct xdp_dev_bulk_queue { unsigned int count; }; -/* DEVMAP values */ +/* DEVMAP map-value layout. + * + * The struct data-layout of map-value is a configuration interface. + * BPF-prog side have read-only access to this memory. + * + * The layout might be different than below, because some struct members are + * optional. This is made dynamic by requiring userspace provides an BTF + * description of the struct layout, when creating the BPF-map. Struct names + * are important and part of API, as BTF use these names to identify members. + */ struct bpf_devmap_val { - __u32 ifindex; /* device index */ + __u32 ifindex; /* device index - mandatory */ union { int fd; /* prog fd on map write */ __u32 id; /* prog id on map read */ } bpf_prog; + struct { + /* This 'storage' member is meant as a dynamically sized area, + * that BPF developer can redefine. As other members are added + * overtime, this area can shrink, as size can be regained by + * not using members above. Add new members above this struct. + */ + unsigned char data[24]; + } storage; }; struct bpf_dtab_netdev { @@ -79,10 +97,18 @@ struct bpf_dtab_netdev { struct bpf_devmap_val val; }; +struct bpf_devmap_val_cfg { + struct { + int ifindex; + int bpf_prog; + } btf_offset; +}; + struct bpf_dtab { struct bpf_map map; struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */ struct list_head list; + struct bpf_devmap_val_cfg cfg; /* these are only used for DEVMAP_HASH type maps */ struct hlist_head *dev_index_head; @@ -116,20 +142,24 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { - __u32 valsize = attr->value_size; u64 cost = 0; int err; - /* check sanity of attributes. 2 value sizes supported: - * 4 bytes: ifindex - * 8 bytes: ifindex + prog fd - */ + /* Value contents validated in dev_map_check_btf */ if (attr->max_entries == 0 || attr->key_size != 4 || - (valsize != offsetofend(struct bpf_devmap_val, ifindex) && - valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) || + attr->value_size > sizeof(struct bpf_devmap_val) || attr->map_flags & ~DEV_CREATE_FLAG_MASK) return -EINVAL; + /* Enforce BTF for userspace, unless dealing with legacy kABI */ + if (attr->value_size != 4 && + (!attr->btf_key_type_id || !attr->btf_value_type_id)) + return -EOPNOTSUPP; + + /* Mark BTF offset's as invalid */ + dtab->cfg.btf_offset.ifindex = -1; + dtab->cfg.btf_offset.bpf_prog = -1; + /* Lookup returns a pointer straight to dev->ifindex, so make sure the * verifier prevents writes from the BPF side */ @@ -199,6 +229,119 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) return &dtab->map; } +struct expect { + u8 btf_kind; + bool mandatory; + int bit_offset; + int size; + const char *name; +}; + +static int btf_find_expect_layout_offset(const struct btf *btf, + const struct btf_type *value_type, + const struct expect *layout) +{ + const struct btf_member *member; + u32 i, off = -ENOENT; + + for_each_member(i, value_type, member) { + const struct btf_type *member_type; + const char *member_name; + + member_type = btf_type_skip_modifiers(btf, member->type, NULL); + if (BTF_INFO_KIND(member_type->info) != layout->btf_kind) { + continue; + } + + member_name = btf_name_by_offset(btf, member->name_off); + if (!member_name) + return -EINVAL; + + if (strcmp(layout->name, member_name)) + continue; + + if (layout->size > 0 && // btf_type_has_size(member_type) && + member_type->size != layout->size) + continue; + + off = btf_member_bit_offset(value_type, member); + if (layout->bit_offset > 0 && + layout->bit_offset != off) { + off = -ENOENT; + continue; + } + + return off; + } + return off; +} + +/* Expected BTF layout that match struct bpf_devmap_val */ +static const struct expect layout[] = { + {BTF_KIND_INT, true, 0, 4, "ifindex"}, + {BTF_KIND_UNION, false, 32, 4, "bpf_prog"}, + {BTF_KIND_STRUCT, false, -1, -1, "storage"} +}; + +static int dev_map_check_btf(const struct bpf_map *map, + const struct btf *btf, + const struct btf_type *key_type, + const struct btf_type *value_type) +{ + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); + u32 found_members_cnt = 0; + u32 int_data; + int off; + u32 i; + + /* Validate KEY type and size */ + if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) + return -EOPNOTSUPP; + + int_data = *(u32 *)(key_type + 1); + if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data) != 0) + return -EOPNOTSUPP; + + /* Validate VALUE have layout that match/map-to struct bpf_devmap_val + * - With a flexible size of member 'storage'. + */ + + if (BTF_INFO_KIND(value_type->info) != BTF_KIND_STRUCT) + return -EOPNOTSUPP; + + /* Struct/union members in BTF must not exceed (max) expected members */ + if (btf_type_vlen(value_type) > ARRAY_SIZE(layout)) + return -E2BIG; + + for (i = 0; i < ARRAY_SIZE(layout); i++) { + off = btf_find_expect_layout_offset(btf, value_type, &layout[i]); + + if (off < 0 && layout[i].mandatory) + return -EUCLEAN; + + if (off >= 0) + found_members_cnt++; + + /* Transfer layout config to map */ + switch (i) { + case 0: + dtab->cfg.btf_offset.ifindex = off; + break; + case 1: + dtab->cfg.btf_offset.bpf_prog = off; + break; + default: + break; + } + } + + /* Detect if BTF/vlen have members that were not found */ + if (btf_type_vlen(value_type) > found_members_cnt) + return -E2BIG; + + return 0; +} + static void dev_map_free(struct bpf_map *map) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); @@ -601,42 +744,53 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key) return ret; } +static inline bool map_value_has_bpf_prog(const struct bpf_dtab *dtab) +{ + return dtab->cfg.btf_offset.bpf_prog >= 0; +} + static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net, - struct bpf_dtab *dtab, + struct bpf_map *map, struct bpf_devmap_val *val, unsigned int idx) { + struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); struct bpf_prog *prog = NULL; struct bpf_dtab_netdev *dev; - dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, + dev = kzalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN, dtab->map.numa_node); if (!dev) return ERR_PTR(-ENOMEM); + /* Member: ifindex is mandatory, both BTF and kABI */ dev->dev = dev_get_by_index(net, val->ifindex); if (!dev->dev) goto err_out; - if (val->bpf_prog.fd >= 0) { - prog = bpf_prog_get_type_dev(val->bpf_prog.fd, - BPF_PROG_TYPE_XDP, false); - if (IS_ERR(prog)) - goto err_put_dev; - if (prog->expected_attach_type != BPF_XDP_DEVMAP) - goto err_put_prog; + /* Member: bpf_prog union is optional, but have fixed offset if exist */ + if (map_value_has_bpf_prog(dtab)) { + if (val->bpf_prog.fd >= 0) { + prog = bpf_prog_get_type_dev(val->bpf_prog.fd, + BPF_PROG_TYPE_XDP, false); + if (IS_ERR(prog)) + goto err_put_dev; + if (prog->expected_attach_type != BPF_XDP_DEVMAP) + goto err_put_prog; + } + if (prog) { + dev->xdp_prog = prog; + val->bpf_prog.id = prog->aux->id; + } else { + dev->xdp_prog = NULL; + val->bpf_prog.id = 0; + } } - dev->idx = idx; dev->dtab = dtab; - if (prog) { - dev->xdp_prog = prog; - dev->val.bpf_prog.id = prog->aux->id; - } else { - dev->xdp_prog = NULL; - dev->val.bpf_prog.id = 0; - } - dev->val.ifindex = val->ifindex; + + /* After adjustment copy map value to get storage area */ + memcpy(&dev->val, val, map->value_size); return dev; err_put_prog: @@ -672,7 +826,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map, if (val.bpf_prog.fd != -1) return -EINVAL; } else { - dev = __dev_map_alloc_node(net, dtab, &val, i); + dev = __dev_map_alloc_node(net, map, &val, i); if (IS_ERR(dev)) return PTR_ERR(dev); } @@ -717,7 +871,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map, if (old_dev && (map_flags & BPF_NOEXIST)) goto out_err; - dev = __dev_map_alloc_node(net, dtab, &val, idx); + dev = __dev_map_alloc_node(net, map, &val, idx); if (IS_ERR(dev)) { err = PTR_ERR(dev); goto out_err; @@ -762,7 +916,7 @@ const struct bpf_map_ops dev_map_ops = { .map_lookup_elem = dev_map_lookup_elem, .map_update_elem = dev_map_update_elem, .map_delete_elem = dev_map_delete_elem, - .map_check_btf = map_check_no_btf, + .map_check_btf = dev_map_check_btf, }; const struct bpf_map_ops dev_map_hash_ops = { @@ -772,7 +926,7 @@ const struct bpf_map_ops dev_map_hash_ops = { .map_lookup_elem = dev_map_hash_lookup_elem, .map_update_elem = dev_map_hash_update_elem, .map_delete_elem = dev_map_hash_delete_elem, - .map_check_btf = map_check_no_btf, + .map_check_btf = dev_map_check_btf, }; static void dev_map_hash_remove_netdev(struct bpf_dtab *dtab,