Introduce a tag structure for directories in bpffs. A tag carries special information about a directory. For example, a BPF_DIR_KERNFS_REP tag denotes that a directory is a replicate of a kernfs hierarchy. At mkdir, if the parent directory has a tag, the child directory also gets tag. For KERNFS_REP directories, the tag references a kernfs node. The KERNFS_REP hierarchy mirrors the hierarchy in kernfs. Userspace is responsible for sync'ing two hierarchies. The initial tag can be created by pinning a certain type of bpf objects. The following patches will introduce such objects and the tagged directory will mirror the cgroup hierarchy. Tags are destroyed at rmdir. Signed-off-by: Hao Luo <haoluo@xxxxxxxxxx> --- kernel/bpf/inode.c | 80 +++++++++++++++++++++++++++++++++++++++++++++- kernel/bpf/inode.h | 22 +++++++++++++ 2 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 kernel/bpf/inode.h diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 5a8d9f7467bf..ecc357009df5 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -16,11 +16,13 @@ #include <linux/fs.h> #include <linux/fs_context.h> #include <linux/fs_parser.h> +#include <linux/kernfs.h> #include <linux/kdev_t.h> #include <linux/filter.h> #include <linux/bpf.h> #include <linux/bpf_trace.h> #include "preload/bpf_preload.h" +#include "inode.h" enum bpf_type { BPF_TYPE_UNSPEC = 0, @@ -142,6 +144,52 @@ static int bpf_inode_type(const struct inode *inode, enum bpf_type *type) return 0; } +static struct bpf_dir_tag *inode_tag(const struct inode *inode) +{ + if (unlikely(!S_ISDIR(inode->i_mode))) + return NULL; + + return inode->i_private; +} + +/* tag_dir_inode - tag a newly created directory. + * @tag: tag of parent directory + * @dentry: dentry of the new directory + * @inode: inode of the new directory + * + * Called from bpf_mkdir. + */ +static int tag_dir_inode(const struct bpf_dir_tag *tag, + const struct dentry *dentry, struct inode *inode) +{ + struct bpf_dir_tag *t; + struct kernfs_node *kn; + + WARN_ON(tag->type != BPF_DIR_KERNFS_REP); + + /* kn is put at tag deallocation. */ + kn = kernfs_find_and_get_ns(tag->private, dentry->d_name.name, NULL); + if (unlikely(!kn)) + return -ENOENT; + + if (unlikely(kernfs_type(kn) != KERNFS_DIR)) { + kernfs_put(kn); + return -EPERM; + } + + t = kzalloc(sizeof(struct bpf_dir_tag), GFP_KERNEL | __GFP_NOWARN); + if (unlikely(!t)) { + kernfs_put(kn); + return -ENOMEM; + } + + t->type = tag->type; + t->private = kn; + + inode->i_private = t; + return 0; +} + static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode, struct inode *dir) { @@ -156,6 +204,8 @@ static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, struct dentry *dentry, umode_t mode) { struct inode *inode; + struct bpf_dir_tag *tag; + int err; inode = bpf_get_inode(dir->i_sb, dir, mode | S_IFDIR); if (IS_ERR(inode)) @@ -164,6 +214,15 @@ static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir, inode->i_op = &bpf_dir_iops; inode->i_fop = &simple_dir_operations; + tag = inode_tag(dir); + if (tag) { + err = tag_dir_inode(tag, dentry, inode); + if (err) { + iput(inode); + return err; + } + } + inc_nlink(inode); inc_nlink(dir); @@ -404,11 +463,30 @@ static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir, return 0; } +static void untag_dir_inode(struct inode *dir) +{ + struct bpf_dir_tag *tag = inode_tag(dir); + + WARN_ON(tag->type != BPF_DIR_KERNFS_REP); + + dir->i_private = NULL; + kernfs_put(tag->private); + kfree(tag); +} + +static int bpf_rmdir(struct inode *dir, struct dentry *dentry) +{ + if (inode_tag(dir)) + untag_dir_inode(dir); + + return simple_rmdir(dir, dentry); +} + static const struct inode_operations bpf_dir_iops = { .lookup = bpf_lookup, .mkdir = bpf_mkdir, .symlink = bpf_symlink, - .rmdir = simple_rmdir, + .rmdir = bpf_rmdir, .rename = simple_rename, .link = simple_link, .unlink = simple_unlink, diff --git a/kernel/bpf/inode.h b/kernel/bpf/inode.h new file mode 100644 index 000000000000..2cfeef39e861 --- /dev/null +++ b/kernel/bpf/inode.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* Copyright (c) 2022 Google + */ +#ifndef __BPF_INODE_H_ +#define __BPF_INODE_H_ + +enum tag_type { + /* The directory is a replicate of a kernfs directory hierarchy. */ + BPF_DIR_KERNFS_REP = 0, +}; + +/* A tag for bpffs directories. It carries special information about a + * directory. For example, BPF_DIR_KERNFS_REP denotes that the directory is + * a replicate of a kernfs hierarchy. Pinning a certain type of objects tags + * a directory and the tag will be removed at rmdir. + */ +struct bpf_dir_tag { + enum tag_type type; + void *private; /* tag private data */ +}; + +#endif -- 2.35.0.rc2.247.g8bbb082509-goog