Here, the target refers to a particular data structure inside the kernel we want to dump. For example, it can be all task_structs in the current pid namespace, or it could be all open files for all task_structs in the current pid namespace. Each target is identified with the following information: target_rel_path <=== relative path to /sys/kernel/bpfdump target_proto <=== kernel func proto used by kernel verifier prog_ctx_type_name <=== prog ctx type used by bpf programs seq_ops <=== seq_ops for seq_file operations seq_priv_size <=== seq_file private data size target_feature <=== target specific feature which needs handling outside seq_ops. The target relative path is a relative directory to /sys/kernel/bpfdump/. For example, it could be: task <=== all tasks task/file <=== all open files under all tasks ipv6_route <=== all ipv6_routes tcp6/sk_local_storage <=== all tcp6 socket local storages foo/bar/tar <=== all tar's in bar in foo The "target_feature" is mostly used for reusing existing seq_ops. For example, for /proc/net/<> stats, the "net" namespace is often stored in file private data. The target_feature enables bpf based dumper to set "net" properly for itself before calling shared seq_ops. bpf_dump_reg_target() is implemented so targets can register themselves. Currently, module is not supported, so there is no bpf_dump_unreg_target(). The main reason is that BTF is not available for modules yet. Since target might call bpf_dump_reg_target() before bpfdump mount point is created, __bpfdump_init() may be called in bpf_dump_reg_target() as well. The file-based dumpers will be regular files under the specific target directory. For example, task/my1 <=== dumper "my1" iterates through all tasks task/file/my2 <=== dumper "my2" iterates through all open files under all tasks Signed-off-by: Yonghong Song <yhs@xxxxxx> --- include/linux/bpf.h | 12 +++ kernel/bpf/dump.c | 198 +++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 208 insertions(+), 2 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fd2b2322412d..84c7eb40d7bc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -31,6 +31,7 @@ struct seq_file; struct btf; struct btf_type; struct exception_table_entry; +struct seq_operations; extern struct idr btf_idr; extern spinlock_t btf_idr_lock; @@ -1109,6 +1110,17 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); int bpf_obj_get_user(const char __user *pathname, int flags); +struct bpf_dump_reg { + const char *target; + const char *target_proto; + const char *prog_ctx_type_name; + const struct seq_operations *seq_ops; + u32 seq_priv_size; + u32 target_feature; +}; + +int bpf_dump_reg_target(struct bpf_dump_reg *reg_info); + int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value, diff --git a/kernel/bpf/dump.c b/kernel/bpf/dump.c index e0c33486e0e7..e8b46f9e0ee0 100644 --- a/kernel/bpf/dump.c +++ b/kernel/bpf/dump.c @@ -12,6 +12,172 @@ #include <linux/filter.h> #include <linux/bpf.h> +struct bpfdump_target_info { + struct list_head list; + const char *target; + const char *target_proto; + struct dentry *dir_dentry; + const struct seq_operations *seq_ops; + u32 seq_priv_size; + u32 target_feature; +}; + +struct bpfdump_targets { + struct list_head dumpers; + struct mutex dumper_mutex; +}; + +/* registered dump targets */ +static struct bpfdump_targets dump_targets; + +static struct dentry *bpfdump_dentry; + +static struct dentry *bpfdump_add_dir(const char *name, struct dentry *parent, + const struct inode_operations *i_ops, + void *data); +static int __bpfdump_init(void); + +/* 0: not inited, > 0: successful, < 0: previous init failed */ +static int bpfdump_inited = 0; + +static int dumper_unlink(struct inode *dir, struct dentry *dentry) +{ + kfree(d_inode(dentry)->i_private); + return simple_unlink(dir, dentry); +} + +static const struct inode_operations bpfdump_dir_iops = { + .lookup = simple_lookup, + .unlink = dumper_unlink, +}; + +int bpf_dump_reg_target(struct bpf_dump_reg *reg_info) +{ + struct bpfdump_target_info *tinfo, *ptinfo; + struct dentry *dentry, *parent; + const char *target, *lastslash; + bool existed = false; + int err, parent_len; + + if (!bpfdump_dentry) { + err = __bpfdump_init(); + if (err) + return err; + } + + tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL); + if (!tinfo) + return -ENOMEM; + + target = reg_info->target; + tinfo->target = target; + tinfo->target_proto = reg_info->target_proto; + tinfo->seq_ops = reg_info->seq_ops; + tinfo->seq_priv_size = reg_info->seq_priv_size; + tinfo->target_feature = reg_info->target_feature; + INIT_LIST_HEAD(&tinfo->list); + + lastslash = strrchr(target, '/'); + parent = bpfdump_dentry; + if (lastslash) { + parent_len = (unsigned long)lastslash - (unsigned long)target; + + mutex_lock(&dump_targets.dumper_mutex); + list_for_each_entry(ptinfo, &dump_targets.dumpers, list) { + if (strlen(ptinfo->target) == parent_len && + strncmp(ptinfo->target, target, parent_len) == 0) { + existed = true; + break; + } + } + mutex_unlock(&dump_targets.dumper_mutex); + if (existed == false) { + err = -ENOENT; + goto free_tinfo; + } + + parent = ptinfo->dir_dentry; + target = lastslash + 1; + } + dentry = bpfdump_add_dir(target, parent, &bpfdump_dir_iops, tinfo); + if (IS_ERR(dentry)) { + err = PTR_ERR(dentry); + goto free_tinfo; + } + + tinfo->dir_dentry = dentry; + + mutex_lock(&dump_targets.dumper_mutex); + list_add(&tinfo->list, &dump_targets.dumpers); + mutex_unlock(&dump_targets.dumper_mutex); + return 0; + +free_tinfo: + kfree(tinfo); + return err; +} + +static struct dentry * +bpfdump_create_dentry(const char *name, umode_t mode, struct dentry *parent, + void *data, const struct inode_operations *i_ops, + const struct file_operations *f_ops) +{ + struct inode *dir, *inode; + struct dentry *dentry; + int err; + + dir = d_inode(parent); + + inode_lock(dir); + dentry = lookup_one_len(name, parent, strlen(name)); + if (IS_ERR(dentry)) + goto unlock; + + if (d_really_is_positive(dentry)) { + err = -EEXIST; + goto dentry_put; + } + + inode = new_inode(dir->i_sb); + if (!inode) { + err = -ENOMEM; + goto dentry_put; + } + + inode->i_ino = get_next_ino(); + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_private = data; + + if (S_ISDIR(mode)) { + inode->i_op = i_ops; + inode->i_fop = f_ops; + inc_nlink(inode); + inc_nlink(dir); + } else { + inode->i_fop = f_ops; + } + + d_instantiate(dentry, inode); + inode_unlock(dir); + return dentry; + +dentry_put: + dput(dentry); + dentry = ERR_PTR(err); +unlock: + inode_unlock(dir); + return dentry; +} + +static struct dentry * +bpfdump_add_dir(const char *name, struct dentry *parent, + const struct inode_operations *i_ops, void *data) +{ + return bpfdump_create_dentry(name, S_IFDIR | 0755, parent, + data, i_ops, &simple_dir_operations); +} + static void bpfdump_free_inode(struct inode *inode) { kfree(inode->i_private); @@ -58,22 +224,50 @@ static struct file_system_type fs_type = { .kill_sb = kill_litter_super, }; -static int __init bpfdump_init(void) +static int __bpfdump_init(void) { + struct vfsmount *mount = NULL; + int mount_count = 0; int ret; + if (bpfdump_inited) + return bpfdump_inited < 0 ? bpfdump_inited : 0; + ret = sysfs_create_mount_point(kernel_kobj, "bpfdump"); if (ret) - return ret; + goto done; ret = register_filesystem(&fs_type); if (ret) goto remove_mount; + /* get a reference to mount so we can populate targets + * at init time. + */ + ret = simple_pin_fs(&fs_type, &mount, &mount_count); + if (ret) + goto remove_mount; + + bpfdump_dentry = mount->mnt_root; + + INIT_LIST_HEAD(&dump_targets.dumpers); + mutex_init(&dump_targets.dumper_mutex); + + bpfdump_inited = 1; return 0; remove_mount: sysfs_remove_mount_point(kernel_kobj, "bpfdump"); +done: + bpfdump_inited = ret; return ret; } + +static int __init bpfdump_init(void) +{ + if (bpfdump_dentry) + return 0; + + return __bpfdump_init(); +} core_initcall(bpfdump_init); -- 2.24.1