Miklos Szeredi <miklos@xxxxxxxxxx> wrote: > Cool, thanks for testing. Unfortunately the test-fsinfo-perf.c file > didn't make it into the patch. Can you please refresh and resend? Oops - I forgot to add it. See attached. David --- commit b7239021cb7660bf328bb7fcce05e3a35ce5842b Author: David Howells <dhowells@xxxxxxxxxx> Date: Tue Mar 31 14:39:07 2020 +0100 Performance test Miklós's patch vs fsinfo diff --git a/fs/Makefile b/fs/Makefile index b6bf2424c7f7..ac0627176db1 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -137,3 +137,4 @@ obj-$(CONFIG_EFIVAR_FS) += efivarfs/ obj-$(CONFIG_EROFS_FS) += erofs/ obj-$(CONFIG_VBOXSF_FS) += vboxsf/ obj-$(CONFIG_ZONEFS_FS) += zonefs/ +obj-y += mountfs/ diff --git a/fs/mount.h b/fs/mount.h index 063f41bc2e93..89b091fc482f 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -82,6 +82,7 @@ struct mount { atomic_t mnt_subtree_notifications; /* Number of notifications in subtree */ struct watch_list *mnt_watchers; /* Watches on dentries within this mount */ #endif + struct mountfs_entry *mnt_mountfs_entry; } __randomize_layout; #define MNT_NS_INTERNAL ERR_PTR(-EINVAL) /* distinct from any mnt_namespace */ @@ -177,3 +178,11 @@ static inline void notify_mount(struct mount *triggered, { } #endif + +void mnt_namespace_lock_read(void); +void mnt_namespace_unlock_read(void); + +void mountfs_create(struct mount *mnt); +extern void mountfs_remove(struct mount *mnt); +int mountfs_lookup_internal(struct vfsmount *m, struct path *path); + diff --git a/fs/mountfs/Makefile b/fs/mountfs/Makefile new file mode 100644 index 000000000000..35a65e9a966f --- /dev/null +++ b/fs/mountfs/Makefile @@ -0,0 +1 @@ +obj-y += super.o diff --git a/fs/mountfs/super.c b/fs/mountfs/super.c new file mode 100644 index 000000000000..82c01eb6154d --- /dev/null +++ b/fs/mountfs/super.c @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include "../pnode.h" +#include <linux/fs.h> +#include <linux/kref.h> +#include <linux/nsproxy.h> +#include <linux/fs_struct.h> +#include <linux/fs_context.h> + +#define MOUNTFS_SUPER_MAGIC 0x4e756f4d + +static DEFINE_SPINLOCK(mountfs_lock); +static struct rb_root mountfs_entries = RB_ROOT; +static struct vfsmount *mountfs_mnt __read_mostly; + +struct mountfs_entry { + struct kref kref; + struct mount *mnt; + struct rb_node node; + int id; +}; + +static const char *mountfs_attrs[] = { + "root", "mountpoint", "id", "parent", "options", "children", + "group", "master", "propagate_from" +}; + +#define MOUNTFS_INO(id) (((unsigned long) id + 1) * \ + (ARRAY_SIZE(mountfs_attrs) + 1)) + +void mountfs_entry_release(struct kref *kref) +{ + kfree(container_of(kref, struct mountfs_entry, kref)); +} + +void mountfs_entry_put(struct mountfs_entry *entry) +{ + kref_put(&entry->kref, mountfs_entry_release); +} + +static bool mountfs_entry_visible(struct mountfs_entry *entry) +{ + struct mount *mnt; + bool visible = false; + + rcu_read_lock(); + mnt = rcu_dereference(entry->mnt); + if (mnt && mnt->mnt_ns == current->nsproxy->mnt_ns) + visible = true; + rcu_read_unlock(); + + return visible; +} +static int mountfs_attr_show(struct seq_file *sf, void *v) +{ + const char *name = sf->file->f_path.dentry->d_name.name; + struct mountfs_entry *entry = sf->private; + struct mount *mnt; + struct vfsmount *m; + struct super_block *sb; + struct path root; + int tmp, err = -ENODEV; + + mnt_namespace_lock_read(); + + mnt = entry->mnt; + if (!mnt || !mnt->mnt_ns) + goto out; + + err = 0; + m = &mnt->mnt; + sb = m->mnt_sb; + + if (strcmp(name, "root") == 0) { + if (sb->s_op->show_path) { + err = sb->s_op->show_path(sf, m->mnt_root); + } else { + seq_dentry(sf, m->mnt_root, " \t\n\\"); + } + seq_putc(sf, '\n'); + } else if (strcmp(name, "mountpoint") == 0) { + struct path mnt_path = { .dentry = m->mnt_root, .mnt = m }; + + get_fs_root(current->fs, &root); + err = seq_path_root(sf, &mnt_path, &root, " \t\n\\"); + if (err == SEQ_SKIP) { + seq_puts(sf, "(unreachable)"); + err = 0; + } + seq_putc(sf, '\n'); + path_put(&root); + } else if (strcmp(name, "id") == 0) { + seq_printf(sf, "%i\n", mnt->mnt_id); + } else if (strcmp(name, "parent") == 0) { + tmp = rcu_dereference(mnt->mnt_parent)->mnt_id; + seq_printf(sf, "%i\n", tmp); + } else if (strcmp(name, "options") == 0) { + int mnt_flags = READ_ONCE(m->mnt_flags); + + seq_puts(sf, mnt_flags & MNT_READONLY ? "ro" : "rw"); + seq_mnt_opts(sf, mnt_flags); + seq_putc(sf, '\n'); + } else if (strcmp(name, "children") == 0) { + struct mount *child; + bool first = true; + + list_for_each_entry(child, &mnt->mnt_mounts, mnt_child) { + if (!first) + seq_putc(sf, ','); + else + first = false; + seq_printf(sf, "%i", child->mnt_id); + } + if (!first) + seq_putc(sf, '\n'); + } else if (strcmp(name, "group") == 0) { + if (IS_MNT_SHARED(mnt)) + seq_printf(sf, "%i\n", mnt->mnt_group_id); + } else if (strcmp(name, "master") == 0) { + if (IS_MNT_SLAVE(mnt)) { + tmp = rcu_dereference(mnt->mnt_master)->mnt_group_id; + seq_printf(sf, "%i\n", tmp); + } + } else if (strcmp(name, "propagate_from") == 0) { + if (IS_MNT_SLAVE(mnt)) { + get_fs_root(current->fs, &root); + tmp = get_dominating_id(mnt, &root); + if (tmp) + seq_printf(sf, "%i\n", tmp); + } + } else { + WARN_ON(1); + err = -EIO; + } +out: + mnt_namespace_unlock_read(); + + return err; +} + +static int mountfs_attr_open(struct inode *inode, struct file *file) +{ + return single_open(file, mountfs_attr_show, inode->i_private); +} + +static const struct file_operations mountfs_attr_fops = { + .open = mountfs_attr_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static struct mountfs_entry *mountfs_node_to_entry(struct rb_node *node) +{ + return rb_entry(node, struct mountfs_entry, node); +} + +static struct rb_node **mountfs_find_node(int id, struct rb_node **parent) +{ + struct rb_node **link = &mountfs_entries.rb_node; + + *parent = NULL; + while (*link) { + struct mountfs_entry *entry = mountfs_node_to_entry(*link); + + *parent = *link; + if (id < entry->id) + link = &entry->node.rb_left; + else if (id > entry->id) + link = &entry->node.rb_right; + else + break; + } + return link; +} + +void mountfs_create(struct mount *mnt) +{ + struct mountfs_entry *entry; + struct rb_node **link, *parent; + + entry = kzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) { + WARN(1, "failed to allocate mountfs entry"); + return; + } + kref_init(&entry->kref); + entry->mnt = mnt; + entry->id = mnt->mnt_id; + + spin_lock(&mountfs_lock); + link = mountfs_find_node(entry->id, &parent); + if (!WARN_ON(*link)) { + rb_link_node(&entry->node, parent, link); + rb_insert_color(&entry->node, &mountfs_entries); + mnt->mnt_mountfs_entry = entry; + } else { + kfree(entry); + } + spin_unlock(&mountfs_lock); +} + +void mountfs_remove(struct mount *mnt) +{ + struct mountfs_entry *entry = mnt->mnt_mountfs_entry; + + if (!entry) + return; + spin_lock(&mountfs_lock); + entry->mnt = NULL; + rb_erase(&entry->node, &mountfs_entries); + spin_unlock(&mountfs_lock); + + mountfs_entry_put(entry); + + mnt->mnt_mountfs_entry = NULL; +} + +static struct mountfs_entry *mountfs_get_entry(const char *name) +{ + struct mountfs_entry *entry = NULL; + struct rb_node **link, *dummy; + unsigned long mnt_id; + char buf[32]; + int ret; + + ret = kstrtoul(name, 10, &mnt_id); + if (ret || mnt_id > INT_MAX) + return NULL; + + snprintf(buf, sizeof(buf), "%lu", mnt_id); + if (strcmp(buf, name) != 0) + return NULL; + + spin_lock(&mountfs_lock); + link = mountfs_find_node(mnt_id, &dummy); + if (*link) { + entry = mountfs_node_to_entry(*link); + if (!mountfs_entry_visible(entry)) + entry = NULL; + else + kref_get(&entry->kref); + } + spin_unlock(&mountfs_lock); + + return entry; +} + +static void mountfs_init_inode(struct inode *inode, umode_t mode); + +static struct dentry *mountfs_lookup_entry(struct dentry *dentry, + struct mountfs_entry *entry, + int idx) +{ + struct inode *inode; + + inode = new_inode(dentry->d_sb); + if (!inode) { + mountfs_entry_put(entry); + return ERR_PTR(-ENOMEM); + } + inode->i_private = entry; + inode->i_ino = MOUNTFS_INO(entry->id) + idx; + mountfs_init_inode(inode, idx ? S_IFREG | 0444 : S_IFDIR | 0555); + return d_splice_alias(inode, dentry); + +} + +static struct dentry *mountfs_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct mountfs_entry *entry = dir->i_private; + int i = 0; + + if (entry) { + for (i = 0; i < ARRAY_SIZE(mountfs_attrs); i++) + if (strcmp(mountfs_attrs[i], dentry->d_name.name) == 0) + break; + if (i == ARRAY_SIZE(mountfs_attrs)) + return ERR_PTR(-ENOMEM); + i++; + kref_get(&entry->kref); + } else { + entry = mountfs_get_entry(dentry->d_name.name); + if (!entry) + return ERR_PTR(-ENOENT); + } + + return mountfs_lookup_entry(dentry, entry, i); +} + +static int mountfs_d_revalidate(struct dentry *dentry, unsigned int flags) +{ + struct mountfs_entry *entry = dentry->d_inode->i_private; + + /* root: valid */ + if (!entry) + return 1; + + /* removed: invalid */ + if (!entry->mnt) + return 0; + + /* attribute or visible in this namespace: valid */ + if (!d_can_lookup(dentry) || mountfs_entry_visible(entry)) + return 1; + + /* invlisible in this namespace: valid but deny entry*/ + return -ENOENT; +} + +static int mountfs_readdir(struct file *file, struct dir_context *ctx) +{ + struct rb_node *node; + struct mountfs_entry *entry = file_inode(file)->i_private; + char name[32]; + const char *s; + unsigned int len, pos, id; + + if (ctx->pos - 2 > INT_MAX || !dir_emit_dots(file, ctx)) + return 0; + + if (entry) { + while (ctx->pos - 2 < ARRAY_SIZE(mountfs_attrs)) { + s = mountfs_attrs[ctx->pos - 2]; + if (!dir_emit(ctx, s, strlen(s), + MOUNTFS_INO(entry->id) + ctx->pos, + DT_REG)) + break; + ctx->pos++; + } + return 0; + } + + pos = ctx->pos - 2; + do { + spin_lock(&mountfs_lock); + mountfs_find_node(pos, &node); + pos = 1U + INT_MAX; + do { + if (!node) { + spin_unlock(&mountfs_lock); + goto out; + } + entry = mountfs_node_to_entry(node); + node = rb_next(node); + } while (!mountfs_entry_visible(entry)); + if (node) + pos = mountfs_node_to_entry(node)->id; + id = entry->id; + spin_unlock(&mountfs_lock); + + len = snprintf(name, sizeof(name), "%i", id); + ctx->pos = id + 2; + if (!dir_emit(ctx, name, len, MOUNTFS_INO(id), DT_DIR)) + return 0; + } while (pos <= INT_MAX); +out: + ctx->pos = pos + 2; + return 0; +} + +int mountfs_lookup_internal(struct vfsmount *m, struct path *path) +{ + char name[32]; + struct qstr this = { .name = name }; + struct mount *mnt = real_mount(m); + struct mountfs_entry *entry = mnt->mnt_mountfs_entry; + struct dentry *dentry, *old, *root = mountfs_mnt->mnt_root; + + this.len = snprintf(name, sizeof(name), "%i", mnt->mnt_id); + dentry = d_hash_and_lookup(root, &this); + if (dentry && dentry->d_inode->i_private != entry) { + d_invalidate(dentry); + dput(dentry); + dentry = NULL; + } + if (!dentry) { + dentry = d_alloc(root, &this); + if (!dentry) + return -ENOMEM; + + kref_get(&entry->kref); + old = mountfs_lookup_entry(dentry, entry, 0); + if (old) { + dput(dentry); + if (IS_ERR(old)) + return PTR_ERR(old); + dentry = old; + } + } + + *path = (struct path) { .mnt = mountfs_mnt, .dentry = dentry }; + return 0; +} + +static const struct dentry_operations mountfs_dops = { + .d_revalidate = mountfs_d_revalidate, +}; + +static const struct inode_operations mountfs_iops = { + .lookup = mountfs_lookup, +}; + +static const struct file_operations mountfs_fops = { + .iterate_shared = mountfs_readdir, + .read = generic_read_dir, + .llseek = generic_file_llseek, +}; + +static void mountfs_init_inode(struct inode *inode, umode_t mode) +{ + inode->i_mode = mode; + inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + if (S_ISREG(mode)) { + inode->i_size = PAGE_SIZE; + inode->i_fop = &mountfs_attr_fops; + } else { + inode->i_op = &mountfs_iops; + inode->i_fop = &mountfs_fops; + } +} + +static void mountfs_evict_inode(struct inode *inode) +{ + struct mountfs_entry *entry = inode->i_private; + + clear_inode(inode); + if (entry) + mountfs_entry_put(entry); +} + +static const struct super_operations mountfs_sops = { + .statfs = simple_statfs, + .drop_inode = generic_delete_inode, + .evict_inode = mountfs_evict_inode, +}; + +static int mountfs_fill_super(struct super_block *sb, struct fs_context *fc) +{ + struct inode *root; + + sb->s_iflags |= SB_I_NOEXEC | SB_I_NODEV; + sb->s_blocksize = PAGE_SIZE; + sb->s_blocksize_bits = PAGE_SHIFT; + sb->s_magic = MOUNTFS_SUPER_MAGIC; + sb->s_time_gran = 1; + sb->s_shrink.seeks = 0; + sb->s_op = &mountfs_sops; + sb->s_d_op = &mountfs_dops; + + root = new_inode(sb); + if (!root) + return -ENOMEM; + + root->i_ino = 1; + mountfs_init_inode(root, S_IFDIR | 0444); + + sb->s_root = d_make_root(root); + if (!sb->s_root) + return -ENOMEM; + + return 0; +} + +static int mountfs_get_tree(struct fs_context *fc) +{ + return get_tree_single(fc, mountfs_fill_super); +} + +static const struct fs_context_operations mountfs_context_ops = { + .get_tree = mountfs_get_tree, +}; + +static int mountfs_init_fs_context(struct fs_context *fc) +{ + fc->ops = &mountfs_context_ops; + fc->global = true; + return 0; +} + +static struct file_system_type mountfs_fs_type = { + .name = "mountfs", + .init_fs_context = mountfs_init_fs_context, + .kill_sb = kill_anon_super, +}; + +static int __init mountfs_init(void) +{ + int err; + + err = register_filesystem(&mountfs_fs_type); + if (!err) { + mountfs_mnt = kern_mount(&mountfs_fs_type); + if (IS_ERR(mountfs_mnt)) { + err = PTR_ERR(mountfs_mnt); + unregister_filesystem(&mountfs_fs_type); + } + } + return err; +} +fs_initcall(mountfs_init); diff --git a/fs/namespace.c b/fs/namespace.c index 5427e732c1bf..a05a2885a90e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -962,6 +962,8 @@ struct vfsmount *vfs_create_mount(struct fs_context *fc) if (fc->sb_flags & SB_KERNMOUNT) mnt->mnt.mnt_flags = MNT_INTERNAL; + else + mountfs_create(mnt); atomic_inc(&fc->root->d_sb->s_active); mnt->mnt.mnt_sb = fc->root->d_sb; @@ -1033,7 +1035,7 @@ vfs_submount(const struct dentry *mountpoint, struct file_system_type *type, } EXPORT_SYMBOL_GPL(vfs_submount); -static struct mount *clone_mnt(struct mount *old, struct dentry *root, +static struct mount *clone_mnt_common(struct mount *old, struct dentry *root, int flag) { struct super_block *sb = old->mnt.mnt_sb; @@ -1100,6 +1102,17 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, return ERR_PTR(err); } +static struct mount *clone_mnt(struct mount *old, struct dentry *root, + int flag) +{ + struct mount *mnt = clone_mnt_common(old, root, flag); + + if (!IS_ERR(mnt)) + mountfs_create(mnt); + + return mnt; +} + static void cleanup_mnt(struct mount *mnt) { struct hlist_node *p; @@ -1112,6 +1125,7 @@ static void cleanup_mnt(struct mount *mnt) * so mnt_get_writers() below is safe. */ WARN_ON(mnt_get_writers(mnt)); + if (unlikely(mnt->mnt_pins.first)) mnt_pin_kill(mnt); hlist_for_each_entry_safe(m, p, &mnt->mnt_stuck_children, mnt_umount) { @@ -1197,6 +1211,8 @@ static void mntput_no_expire(struct mount *mnt) unlock_mount_hash(); shrink_dentry_list(&list); + mountfs_remove(mnt); + if (likely(!(mnt->mnt.mnt_flags & MNT_INTERNAL))) { struct task_struct *task = current; if (likely(!(task->flags & PF_KTHREAD))) { @@ -1263,13 +1279,14 @@ EXPORT_SYMBOL(path_is_mountpoint); struct vfsmount *mnt_clone_internal(const struct path *path) { struct mount *p; - p = clone_mnt(real_mount(path->mnt), path->dentry, CL_PRIVATE); + p = clone_mnt_common(real_mount(path->mnt), path->dentry, CL_PRIVATE); if (IS_ERR(p)) return ERR_CAST(p); p->mnt.mnt_flags |= MNT_INTERNAL; return &p->mnt; } + #ifdef CONFIG_PROC_FS /* iterator; we want it to have access to namespace_sem, thus here... */ static void *m_start(struct seq_file *m, loff_t *pos) @@ -1411,6 +1428,16 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +void mnt_namespace_lock_read(void) +{ + down_read(&namespace_sem); +} + +void mnt_namespace_unlock_read(void) +{ + up_read(&namespace_sem); +} + enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, diff --git a/fs/proc/base.c b/fs/proc/base.c index c7c64272b0fa..0477f8b51182 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3092,6 +3092,7 @@ static const struct pid_entry tgid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("map_files", S_IRUSR|S_IXUSR, proc_map_files_inode_operations, proc_map_files_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), + DIR("fdmount", S_IRUSR|S_IXUSR, proc_fdmount_inode_operations, proc_fdmount_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), @@ -3497,6 +3498,7 @@ static const struct inode_operations proc_tid_comm_inode_operations = { static const struct pid_entry tid_base_stuff[] = { DIR("fd", S_IRUSR|S_IXUSR, proc_fd_inode_operations, proc_fd_operations), DIR("fdinfo", S_IRUSR|S_IXUSR, proc_fdinfo_inode_operations, proc_fdinfo_operations), + DIR("fdmount", S_IRUSR|S_IXUSR, proc_fdmount_inode_operations, proc_fdmount_operations), DIR("ns", S_IRUSR|S_IXUGO, proc_ns_dir_inode_operations, proc_ns_dir_operations), #ifdef CONFIG_NET DIR("net", S_IRUGO|S_IXUGO, proc_net_inode_operations, proc_net_operations), diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 81882a13212d..94a57e178801 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -361,3 +361,85 @@ const struct file_operations proc_fdinfo_operations = { .iterate_shared = proc_readfdinfo, .llseek = generic_file_llseek, }; + +static int proc_fdmount_link(struct dentry *dentry, struct path *path) +{ + struct files_struct *files = NULL; + struct task_struct *task; + struct path fd_path; + int ret = -ENOENT; + + task = get_proc_task(d_inode(dentry)); + if (task) { + files = get_files_struct(task); + put_task_struct(task); + } + + if (files) { + unsigned int fd = proc_fd(d_inode(dentry)); + struct file *fd_file; + + spin_lock(&files->file_lock); + fd_file = fcheck_files(files, fd); + if (fd_file) { + fd_path = fd_file->f_path; + path_get(&fd_path); + ret = 0; + } + spin_unlock(&files->file_lock); + put_files_struct(files); + } + if (!ret) { + ret = mountfs_lookup_internal(fd_path.mnt, path); + path_put(&fd_path); + } + + return ret; +} + +static struct dentry *proc_fdmount_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + const struct fd_data *data = ptr; + struct proc_inode *ei; + struct inode *inode; + + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | 0400); + if (!inode) + return ERR_PTR(-ENOENT); + + ei = PROC_I(inode); + ei->fd = data->fd; + + inode->i_op = &proc_pid_link_inode_operations; + inode->i_size = 64; + + ei->op.proc_get_link = proc_fdmount_link; + tid_fd_update_inode(task, inode, 0); + + d_set_d_op(dentry, &tid_fd_dentry_operations); + return d_splice_alias(inode, dentry); +} + +static struct dentry * +proc_lookupfdmount(struct inode *dir, struct dentry *dentry, unsigned int flags) +{ + return proc_lookupfd_common(dir, dentry, proc_fdmount_instantiate); +} + +static int proc_readfdmount(struct file *file, struct dir_context *ctx) +{ + return proc_readfd_common(file, ctx, + proc_fdmount_instantiate); +} + +const struct inode_operations proc_fdmount_inode_operations = { + .lookup = proc_lookupfdmount, + .setattr = proc_setattr, +}; + +const struct file_operations proc_fdmount_operations = { + .read = generic_read_dir, + .iterate_shared = proc_readfdmount, + .llseek = generic_file_llseek, +}; diff --git a/fs/proc/fd.h b/fs/proc/fd.h index f371a602bf58..9e087c833e65 100644 --- a/fs/proc/fd.h +++ b/fs/proc/fd.h @@ -10,6 +10,9 @@ extern const struct inode_operations proc_fd_inode_operations; extern const struct file_operations proc_fdinfo_operations; extern const struct inode_operations proc_fdinfo_inode_operations; +extern const struct file_operations proc_fdmount_operations; +extern const struct inode_operations proc_fdmount_inode_operations; + extern int proc_fd_permission(struct inode *inode, int mask); static inline unsigned int proc_fd(struct inode *inode) diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 273ee82d8aa9..e634faa9160e 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -61,24 +61,6 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) return security_sb_show_options(m, sb); } -static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt) -{ - static const struct proc_fs_info mnt_info[] = { - { MNT_NOSUID, ",nosuid" }, - { MNT_NODEV, ",nodev" }, - { MNT_NOEXEC, ",noexec" }, - { MNT_NOATIME, ",noatime" }, - { MNT_NODIRATIME, ",nodiratime" }, - { MNT_RELATIME, ",relatime" }, - { 0, NULL } - }; - const struct proc_fs_info *fs_infop; - - for (fs_infop = mnt_info; fs_infop->flag; fs_infop++) { - if (mnt->mnt_flags & fs_infop->flag) - seq_puts(m, fs_infop->str); - } -} static inline void mangle(struct seq_file *m, const char *s) { @@ -120,7 +102,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt) err = show_sb_opts(m, sb); if (err) goto out; - show_mnt_opts(m, mnt); + seq_mnt_opts(m, mnt->mnt_flags); if (sb->s_op->show_options) err = sb->s_op->show_options(m, mnt_path.dentry); seq_puts(m, " 0 0\n"); @@ -153,7 +135,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt) goto out; seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw"); - show_mnt_opts(m, mnt); + seq_mnt_opts(m, mnt->mnt_flags); /* Tagged fields ("foo:X" or "bar") */ if (IS_MNT_SHARED(r)) diff --git a/fs/seq_file.c b/fs/seq_file.c index 1600034a929b..9726baba1732 100644 --- a/fs/seq_file.c +++ b/fs/seq_file.c @@ -15,6 +15,7 @@ #include <linux/cred.h> #include <linux/mm.h> #include <linux/printk.h> +#include <linux/mount.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> @@ -548,6 +549,28 @@ int seq_dentry(struct seq_file *m, struct dentry *dentry, const char *esc) } EXPORT_SYMBOL(seq_dentry); +void seq_mnt_opts(struct seq_file *m, int mnt_flags) +{ + unsigned int i; + static const struct { + int flag; + const char *str; + } mnt_info[] = { + { MNT_NOSUID, ",nosuid" }, + { MNT_NODEV, ",nodev" }, + { MNT_NOEXEC, ",noexec" }, + { MNT_NOATIME, ",noatime" }, + { MNT_NODIRATIME, ",nodiratime" }, + { MNT_RELATIME, ",relatime" }, + { 0, NULL } + }; + + for (i = 0; mnt_info[i].flag; i++) { + if (mnt_flags & mnt_info[i].flag) + seq_puts(m, mnt_info[i].str); + } +} + static void *single_start(struct seq_file *p, loff_t *pos) { return NULL + (*pos == 0); diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index 770c2bf3aa43..9dd7812eb777 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -138,6 +138,7 @@ int seq_file_path(struct seq_file *, struct file *, const char *); int seq_dentry(struct seq_file *, struct dentry *, const char *); int seq_path_root(struct seq_file *m, const struct path *path, const struct path *root, const char *esc); +void seq_mnt_opts(struct seq_file *m, int mnt_flags); int single_open(struct file *, int (*)(struct seq_file *, void *), void *); int single_open_size(struct file *, int (*)(struct seq_file *, void *), void *, size_t); diff --git a/samples/vfs/Makefile b/samples/vfs/Makefile index 19be60ab950e..78deb8483d27 100644 --- a/samples/vfs/Makefile +++ b/samples/vfs/Makefile @@ -4,6 +4,7 @@ hostprogs := \ test-fsinfo \ test-fsmount \ + test-fsinfo-perf \ test-mntinfo \ test-statx @@ -12,6 +13,7 @@ always-y := $(hostprogs) HOSTCFLAGS_test-fsinfo.o += -I$(objtree)/usr/include HOSTLDLIBS_test-fsinfo += -static -lm HOSTCFLAGS_test-mntinfo.o += -I$(objtree)/usr/include +HOSTCFLAGS_test-fsinfo-perf.o += -I$(objtree)/usr/include HOSTCFLAGS_test-fsmount.o += -I$(objtree)/usr/include HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include diff --git a/samples/vfs/test-fsinfo-perf.c b/samples/vfs/test-fsinfo-perf.c new file mode 100644 index 000000000000..fba40737f768 --- /dev/null +++ b/samples/vfs/test-fsinfo-perf.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* Test the fsinfo() system call + * + * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@xxxxxxxxxx) + */ + +#define _GNU_SOURCE +#define _ATFILE_SOURCE +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <ctype.h> +#include <errno.h> +#include <time.h> +#include <math.h> +#include <fcntl.h> +#include <sys/syscall.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <sys/time.h> +#include <linux/fsinfo.h> + +#ifndef __NR_fsinfo +#define __NR_fsinfo -1 +#endif + +#define ERR(ret, what) do { if ((long)(ret) == -1) { perror(what); exit(1); } } while(0) +#define OOM(ret) do { if (!(ret)) { perror(NULL); exit(1); } } while(0) + +static int nr_mounts = 3; +static const char *base_path; + +static __attribute__((unused)) +ssize_t fsinfo(int dfd, const char *filename, + struct fsinfo_params *params, size_t params_size, + void *result_buffer, size_t result_buf_size) +{ + return syscall(__NR_fsinfo, dfd, filename, + params, params_size, + result_buffer, result_buf_size); +} + +static void iterate(void (*func)(int i, const char *)) +{ + char name[4096]; + int i; + + for (i = 0; i < nr_mounts; i++) { + sprintf(name, "%s/%d", base_path, i); + func(i, name); + } +} + +static void make_mount(int ix, const char *path) +{ + ERR(mkdir(path, 0755), "mkdir"); + ERR(mount("none", path, "tmpfs", 0, NULL), "mount"); + ERR(mount("none", path, NULL, MS_PRIVATE, NULL), "mount"); +} + +static void do_umount(void) +{ + printf("--- umount ---\n"); + if (umount2(base_path, MNT_DETACH) == -1) + perror("umount"); +} + +static unsigned long sum_mnt_id; + +static void get_mntid_by_fsinfo(int ix, const char *path) +{ + struct fsinfo_mount_info r; + struct fsinfo_params params = { + .flags = FSINFO_FLAGS_QUERY_PATH, + .request = FSINFO_ATTR_MOUNT_INFO, + }; + + ERR(fsinfo(AT_FDCWD, path, ¶ms, sizeof(params), &r, sizeof(r)), + "fsinfo"); + //printf("[%u] %u\n", ix, r.mnt_id); + sum_mnt_id += r.mnt_id; +} + +static void get_mntid_by_proc(int ix, const char *path) +{ + unsigned int mnt_id; + ssize_t len; + char procfile[100], buffer[4096], *p, *nl; + int fd, fd2; + + fd = open(path, O_PATH); + ERR(fd, "open/path"); + sprintf(procfile, "/proc/self/fdinfo/%u", fd); + fd2 = open(procfile, O_RDONLY); + ERR(fd2, "open/proc"); + len = read(fd2, buffer, sizeof(buffer) - 1); + ERR(len, "read"); + buffer[len] = 0; + close(fd2); + close(fd); + + p = buffer; + do { + nl = strchr(p, '\n'); + if (nl) + *nl++ = '\0'; + else + nl = NULL; + + if (strncmp(p, "mnt_id:", 7) != 0) + continue; + p += 7; + while (isblank(*p)) + p++; + /* Have to allow for extra numbers being added to the line */ + if (sscanf(p, "%u", &mnt_id) != 1) { + fprintf(stderr, "Bad format %s\n", procfile); + exit(3); + } + break; + + } while ((p = nl)); + + if (!p) { + fprintf(stderr, "Missing field %s\n", procfile); + exit(3); + } + + sum_mnt_id += mnt_id; + //printf("[%u] %u\n", ix, mnt_id); +} + +static void get_mntid_by_fsinfo_2(void) +{ + struct fsinfo_mount_child *children, *c, *end; + struct fsinfo_mount_info r; + struct fsinfo_params params = { + .flags = FSINFO_FLAGS_QUERY_PATH, + .request = FSINFO_ATTR_MOUNT_INFO, + }; + unsigned int base_mnt_id; + size_t s_children, n_children; + char name[32]; + int i; + + /* Convert path to mount ID */ + ERR(fsinfo(AT_FDCWD, base_path, ¶ms, sizeof(params), &r, sizeof(r)), + "fsinfo/base"); + base_mnt_id = r.mnt_id; + //printf("[B] %u\n", base_mnt_id); + + /* Get a list of all the children of this mount ID */ + s_children = (nr_mounts + 1) * sizeof(*children); + children = malloc(s_children); + OOM(children); + + params.flags = FSINFO_FLAGS_QUERY_MOUNT; + params.request = FSINFO_ATTR_MOUNT_CHILDREN; + sprintf(name, "%u", base_mnt_id); + s_children = fsinfo(AT_FDCWD, name, ¶ms, sizeof(params), children, s_children); + ERR(s_children, "fsinfo/children"); + + /* Query each child */ + n_children = s_children / sizeof(*c) - 1; // Parent is added at end + c = children; + end = c + n_children; + for (i = 0; c < end; c++, i++) { + //printf("[%u] %u\n", i, c->mnt_id); + params.flags = FSINFO_FLAGS_QUERY_MOUNT; + params.request = FSINFO_ATTR_MOUNT_INFO; + sprintf(name, "%u", c->mnt_id); + ERR(fsinfo(AT_FDCWD, name, ¶ms, sizeof(params), &r, sizeof(r)), + "fsinfo/child"); + sum_mnt_id += r.mnt_id; + } +} + +static void get_mntid_by_mountfs(void) +{ + unsigned int base_mnt_id, mnt_id, x; + ssize_t len, s_children; + char procfile[100], buffer[100], *children, *p, *q, *nl, *comma; + int fd, fd2, mntfd, i; + + /* Start off by reading the mount ID from the base path */ + fd = open(base_path, O_PATH); + ERR(fd, "open/path"); + sprintf(procfile, "/proc/self/fdinfo/%u", fd); + fd2 = open(procfile, O_RDONLY); + ERR(fd2, "open/proc"); + len = read(fd2, buffer, sizeof(buffer) - 1); + ERR(len, "read"); + buffer[len] = 0; + close(fd2); + close(fd); + + p = buffer; + do { + nl = strchr(p, '\n'); + if (nl) + *nl++ = '\0'; + else + nl = NULL; + + if (strncmp(p, "mnt_id:", 7) != 0) + continue; + p += 7; + while (isblank(*p)) + p++; + /* Have to allow for extra numbers being added to the line */ + if (sscanf(p, "%u", &base_mnt_id) != 1) { + fprintf(stderr, "Bad format %s\n", procfile); + exit(3); + } + break; + + } while ((p = nl)); + + if (!p) { + fprintf(stderr, "Missing field %s\n", procfile); + exit(3); + } + + if (0) printf("[B] %u\n", base_mnt_id); + + mntfd = open("/mnt", O_PATH); + ERR(fd, "open/mountfs"); + + /* Get a list of all the children of this mount ID */ + s_children = (nr_mounts) * 12; + children = malloc(s_children); + OOM(children); + + sprintf(procfile, "%u/children", base_mnt_id); + fd = openat(mntfd, procfile, O_RDONLY); + ERR(fd, "open/children"); + s_children = read(fd, children, s_children - 1); + ERR(s_children, "read/children"); + close(fd); + if (s_children > 0 && children[s_children - 1] == '\n') + s_children--; + children[s_children] = 0; + + /* Query each child */ + p = children; + if (!*p) + return; + i = 0; + do { + mnt_id = strtoul(p, &comma, 10); + if (*comma) { + if (*comma != ',') { + fprintf(stderr, "Bad format in mountfs-children\n"); + exit(3); + } + comma++; + } + + sprintf(procfile, "%u/id", mnt_id); + fd = openat(mntfd, procfile, O_RDONLY); + ERR(fd, procfile); + len = read(fd, buffer, sizeof(buffer) - 1); + ERR(len, "read/id"); + close(fd); + if (len > 0 && buffer[len - 1] == '\n') + len--; + buffer[len] = 0; + + x = strtoul(buffer, &q, 10); + + if (*q) { + fprintf(stderr, "Bad format in %s '%s'\n", procfile, buffer); + exit(3); + } + + if (0) printf("[%u] %u\n", i++, x); + sum_mnt_id += x; + } while (p = comma, *comma); +} + +static unsigned long duration(struct timeval *before, struct timeval *after) +{ + unsigned long a, b; + + a = after->tv_sec * 1000000 + after->tv_usec; + b = before->tv_sec * 1000000 + before->tv_usec; + return a - b; +} + +int main(int argc, char **argv) +{ + struct timeval f_before, f_after; + struct timeval f2_before, f2_after; + struct timeval p_before, p_after; + struct timeval p2_before, p2_after; + const char *path; + unsigned long f_dur, f2_dur, p_dur, p2_dur; + + if (argc < 2) { + fprintf(stderr, "Format: %s <path> [nr_mounts]\n", argv[0]); + exit(2); + } + + if (argc == 3) + nr_mounts = atoi(argv[2]); + + path = argv[1]; + ERR(mount("none", path, "tmpfs", 0, NULL), "mount"); + ERR(mount("none", path, NULL, MS_PRIVATE, NULL), "mount"); + base_path = path; + atexit(do_umount); + + printf("--- make mounts ---\n"); + iterate(make_mount); + + printf("--- test fsinfo by path ---\n"); + sum_mnt_id = 0; + ERR(gettimeofday(&f_before, NULL), "gettimeofday"); + iterate(get_mntid_by_fsinfo); + ERR(gettimeofday(&f_after, NULL), "gettimeofday"); + printf("sum(mnt_id) = %lu\n", sum_mnt_id); + + printf("--- test fsinfo by mnt_id ---\n"); + sum_mnt_id = 0; + ERR(gettimeofday(&f2_before, NULL), "gettimeofday"); + get_mntid_by_fsinfo_2(); + ERR(gettimeofday(&f2_after, NULL), "gettimeofday"); + printf("sum(mnt_id) = %lu\n", sum_mnt_id); + + printf("--- test /proc/fdinfo ---\n"); + sum_mnt_id = 0; + ERR(gettimeofday(&p_before, NULL), "gettimeofday"); + iterate(get_mntid_by_proc); + ERR(gettimeofday(&p_after, NULL), "gettimeofday"); + printf("sum(mnt_id) = %lu\n", sum_mnt_id); + + printf("--- test mountfs ---\n"); + sum_mnt_id = 0; + ERR(gettimeofday(&p2_before, NULL), "gettimeofday"); + get_mntid_by_mountfs(); + ERR(gettimeofday(&p2_after, NULL), "gettimeofday"); + printf("sum(mnt_id) = %lu\n", sum_mnt_id); + + f_dur = duration(&f_before, &f_after); + f2_dur = duration(&f2_before, &f2_after); + p_dur = duration(&p_before, &p_after); + p2_dur = duration(&p2_before, &p2_after); + //printf("fsinfo duration %10luus for %d mounts\n", f_dur, nr_mounts); + //printf("procfd duration %10luus for %d mounts\n", p_dur, nr_mounts); + + printf("For %7d mounts, f=%10luus f2=%10luus p=%10luus p2=%10luus; p=%.1f*f p=%.1f*f2 p=%.1f*p2\n", + nr_mounts, f_dur, f2_dur, p_dur, p2_dur, + (double)p_dur / (double)f_dur, + (double)p_dur / (double)f2_dur, + (double)p_dur / (double)p2_dur); + return 0; +}