This is a new directory to show all namespaces, which can be accessed from this /proc tasks credentials. Every /proc is related to a pid_namespace, and the pid_namespace is related to a user_namespace. The items, we show in this /proc/namespaces/ directory, are the namespaces, whose user_namespaces are the same as /proc's user_namespace, or their descendants. Say, /proc has pid_ns->user_ns, so in /proc/namespace we show only a ns, which is in_userns(pid_ns->user_ns, ns->user_ns). The final result is like below: # ls /proc/namespaces/ -l lrwxrwxrwx 1 root root 0 Jul 29 16:50 'cgroup:[4026531835]' -> 'cgroup:[4026531835]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'ipc:[4026531839]' -> 'ipc:[4026531839]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531840]' -> 'mnt:[4026531840]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026531861]' -> 'mnt:[4026531861]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532133]' -> 'mnt:[4026532133]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532134]' -> 'mnt:[4026532134]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532135]' -> 'mnt:[4026532135]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'mnt:[4026532136]' -> 'mnt:[4026532136]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'net:[4026531993]' -> 'net:[4026531993]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'pid:[4026531836]' -> 'pid:[4026531836]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'time:[4026531834]' -> 'time:[4026531834]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'user:[4026531837]' -> 'user:[4026531837]' lrwxrwxrwx 1 root root 0 Jul 29 16:50 'uts:[4026531838]' -> 'uts:[4026531838]' Every namespace may be open like ordinary file in /proc/[pid]/ns. Signed-off-by: Kirill Tkhai <ktkhai@xxxxxxxxxxxxx> --- fs/nsfs.c | 2 fs/proc/Makefile | 1 fs/proc/internal.h | 16 ++ fs/proc/namespaces.c | 314 +++++++++++++++++++++++++++++++++++++++++++++++ fs/proc/root.c | 17 ++- include/linux/proc_fs.h | 1 6 files changed, 345 insertions(+), 6 deletions(-) create mode 100644 fs/proc/namespaces.c diff --git a/fs/nsfs.c b/fs/nsfs.c index ee4be67d3a0b..61b789d2089c 100644 --- a/fs/nsfs.c +++ b/fs/nsfs.c @@ -58,7 +58,7 @@ static void nsfs_evict(struct inode *inode) ns->ops->put(ns); } -static int __ns_get_path(struct path *path, struct ns_common *ns) +int __ns_get_path(struct path *path, struct ns_common *ns) { struct vfsmount *mnt = nsfs_mnt; struct dentry *dentry; diff --git a/fs/proc/Makefile b/fs/proc/Makefile index dc2d51f42905..34ff671c6d59 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -25,6 +25,7 @@ proc-y += util.o proc-y += version.o proc-y += softirqs.o proc-y += task_namespaces.o +proc-y += namespaces.o proc-y += self.o proc-y += thread_self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 572757ff97be..d19fe5574799 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -134,10 +134,11 @@ void task_dump_owner(struct task_struct *task, umode_t mode, kuid_t *ruid, kgid_t *rgid); unsigned name_to_int(const struct qstr *qstr); -/* - * Offset of the first process in the /proc root directory.. - */ -#define FIRST_PROCESS_ENTRY 256 + +/* Offset of "namespaces" entry in /proc root directory */ +#define NAMESPACES_ENTRY 256 +/* Offset of the first process in the /proc root directory */ +#define FIRST_PROCESS_ENTRY (NAMESPACES_ENTRY + 1) /* Worst case buffer size needed for holding an integer. */ #define PROC_NUMBUF 13 @@ -168,6 +169,7 @@ extern void proc_pid_evict_inode(struct proc_inode *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern void pid_update_inode(struct task_struct *, struct inode *); extern int pid_delete_dentry(const struct dentry *); +extern int proc_emit_namespaces(struct file *, struct dir_context *); extern int proc_pid_readdir(struct file *, struct dir_context *); struct dentry *proc_pid_lookup(struct dentry *, unsigned int); extern loff_t mem_lseek(struct file *, loff_t, int); @@ -222,6 +224,12 @@ void set_proc_pid_nlink(void); extern struct inode *proc_get_inode(struct super_block *, struct proc_dir_entry *); extern void proc_entry_rundown(struct proc_dir_entry *); +/* + * namespaces.c + */ +extern int proc_setup_namespaces(struct super_block *); +extern void proc_namespaces_init(void); + /* * task_namespaces.c */ diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c new file mode 100644 index 000000000000..ab47e1555619 --- /dev/null +++ b/fs/proc/namespaces.c @@ -0,0 +1,314 @@ +#include <linux/pid_namespace.h> +#include <linux/user_namespace.h> +#include <linux/namei.h> +#include "internal.h" + +static unsigned namespaces_inum __ro_after_init; + +int proc_emit_namespaces(struct file *file, struct dir_context *ctx) +{ + struct proc_fs_info *fs_info = proc_sb_info(file_inode(file)->i_sb); + struct inode *inode = d_inode(fs_info->proc_namespaces); + + return dir_emit(ctx, "namespaces", 10, inode->i_ino, DT_DIR); +} + +static int parse_namespace_dentry_name(const struct dentry *dentry, + const char **type, unsigned int *type_len, unsigned int *inum) +{ + const char *p, *name; + int count; + + *type = name = dentry->d_name.name; + p = strchr(name, ':'); + *type_len = p - name; + if (!p || p == name) + return -ENOENT; + + p += 1; + if (sscanf(p, "[%u]%n", inum, &count) != 1 || *(p + count) != '\0' || + *inum < PROC_NS_MIN_INO) + return -ENOENT; + + return 0; +} + +static struct ns_common *get_namespace_by_dentry(struct pid_namespace *pid_ns, + const struct dentry *dentry) +{ + unsigned int type_len, inum, p_inum; + struct user_namespace *user_ns; + struct ns_common *ns; + const char *type; + + if (parse_namespace_dentry_name(dentry, &type, &type_len, &inum) < 0) + return NULL; + + p_inum = inum - 1; + ns = ns_get_next(&p_inum); + if (!ns) + return NULL; + + if (ns->inum != inum || strncmp(type, ns->ops->name, type_len) != 0 || + ns->ops->name[type_len] != '\0') { + ns->ops->put(ns); + return NULL; + } + + if (ns->ops != &userns_operations) + user_ns = ns->ops->owner(ns); + else + user_ns = container_of(ns, struct user_namespace, ns); + + if (!in_userns(pid_ns->user_ns, user_ns)) { + ns->ops->put(ns); + return NULL; + } + + return ns; +} + +static struct dentry *proc_namespace_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr); + +static struct dentry *proc_namespaces_lookup(struct inode *dir, struct dentry *dentry, + unsigned int flags) +{ + struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb); + struct task_struct *task; + struct ns_common *ns; + + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + return ERR_PTR(-ENOENT); + + read_lock(&tasklist_lock); + task = get_task_struct(pid_ns->child_reaper); + read_unlock(&tasklist_lock); + + dentry = proc_namespace_instantiate(dentry, task, ns); + put_task_struct(task); + ns->ops->put(ns); + + return dentry; +} + +static int proc_namespaces_permission(struct inode *inode, int mask) +{ + if ((mask & MAY_EXEC) && S_ISLNK(inode->i_mode)) + return -EACCES; + + return 0; +} + +static int proc_namespaces_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); + + generic_fillattr(inode, stat); + return 0; +} + +static const struct inode_operations proc_namespaces_inode_operations = { + .lookup = proc_namespaces_lookup, + .permission = proc_namespaces_permission, + .getattr = proc_namespaces_getattr, +}; + +static int proc_namespaces_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb); + struct ns_common *ns; + + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + return -ENOENT; + ns->ops->put(ns); + + /* proc_namespaces_readdir() creates dentry names in namespace format */ + return readlink_copy(buffer, buflen, dentry->d_iname); +} + +int __ns_get_path(struct path *path, struct ns_common *ns); + +static const char *proc_namespaces_getlink(struct dentry *dentry, + struct inode *inode, struct delayed_call *done) +{ + struct pid_namespace *pid_ns = proc_pid_ns(inode->i_sb); + struct ns_common *ns; + struct path path; + int ret; + + if (!dentry) + return ERR_PTR(-ECHILD); + + while (1) { + ret = -ENOENT; + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + goto out; + + ret = __ns_get_path(&path, ns); + if (ret == -EAGAIN) + continue; + if (ret) + goto out; + break; + } + + ret = nd_jump_link(&path); +out: + return ERR_PTR(ret); +} + +static const struct inode_operations proc_namespaces_link_inode_operations = { + .readlink = proc_namespaces_readlink, + .get_link = proc_namespaces_getlink, +}; + +static int namespace_delete_dentry(const struct dentry *dentry) +{ + struct inode *dir = dentry->d_parent->d_inode; + struct pid_namespace *pid_ns = proc_pid_ns(dir->i_sb); + struct ns_common *ns; + + ns = get_namespace_by_dentry(pid_ns, dentry); + if (!ns) + return 1; + + ns->ops->put(ns); + return 0; +} + +const struct dentry_operations namespaces_dentry_operations = { + .d_delete = namespace_delete_dentry, +}; + +static void namespace_update_inode(struct inode *inode) +{ + struct user_namespace *user_ns = proc_pid_ns(inode->i_sb)->user_ns; + + inode->i_uid = make_kuid(user_ns, 0); + if (!uid_valid(inode->i_uid)) + inode->i_uid = GLOBAL_ROOT_UID; + + inode->i_gid = make_kgid(user_ns, 0); + if (!gid_valid(inode->i_gid)) + inode->i_gid = GLOBAL_ROOT_GID; +} + +static struct dentry *proc_namespace_instantiate(struct dentry *dentry, + struct task_struct *task, const void *ptr) +{ + const struct ns_common *ns = ptr; + struct inode *inode; + struct proc_inode *ei; + + /* + * Create inode with credentials of @task, and add it to @task's + * quick removal list. + */ + inode = proc_pid_make_inode(dentry->d_sb, task, S_IFLNK | S_IRWXUGO); + if (!inode) + return ERR_PTR(-ENOENT); + + ei = PROC_I(inode); + inode->i_op = &proc_namespaces_link_inode_operations; + ei->ns_ops = ns->ops; + namespace_update_inode(inode); + + d_set_d_op(dentry, &namespaces_dentry_operations); + return d_splice_alias(inode, dentry); +} + +static int proc_namespaces_readdir(struct file *file, struct dir_context *ctx) +{ + struct pid_namespace *pid_ns = proc_pid_ns(file_inode(file)->i_sb); + struct user_namespace *user_ns; + struct task_struct *task; + struct ns_common *ns; + unsigned int inum; + + read_lock(&tasklist_lock); + task = get_task_struct(pid_ns->child_reaper); + read_unlock(&tasklist_lock); + + if (!dir_emit_dots(file, ctx)) + goto out; + + inum = ctx->pos - 2; + while ((ns = ns_get_next(&inum)) != NULL) { + unsigned int len; + char name[32]; + + if (ns->ops != &userns_operations) + user_ns = ns->ops->owner(ns); + else + user_ns = container_of(ns, struct user_namespace, ns); + + if (!in_userns(pid_ns->user_ns, user_ns)) + goto next; + + len = snprintf(name, sizeof(name), "%s:[%u]", ns->ops->name, inum); + + if (!proc_fill_cache(file, ctx, name, len, + proc_namespace_instantiate, task, ns)) { + ns->ops->put(ns); + break; + } +next: + ns->ops->put(ns); + ctx->pos = inum + 2; + } +out: + put_task_struct(task); + return 0; +} + +static const struct file_operations proc_namespaces_file_operations = { + .read = generic_read_dir, + .iterate_shared = proc_namespaces_readdir, + .llseek = generic_file_llseek, +}; + +int proc_setup_namespaces(struct super_block *s) +{ + struct proc_fs_info *fs_info = proc_sb_info(s); + struct inode *root_inode = d_inode(s->s_root); + struct dentry *namespaces; + int ret = -ENOMEM; + + inode_lock(root_inode); + namespaces = d_alloc_name(s->s_root, "namespaces"); + if (namespaces) { + struct inode *inode = new_inode_pseudo(s); + if (inode) { + inode->i_ino = namespaces_inum; + inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode); + inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; + inode->i_uid = GLOBAL_ROOT_UID; + inode->i_gid = GLOBAL_ROOT_GID; + inode->i_op = &proc_namespaces_inode_operations; + inode->i_fop = &proc_namespaces_file_operations; + d_add(namespaces, inode); + ret = 0; + } else { + dput(namespaces); + } + } + inode_unlock(root_inode); + + if (ret) + pr_err("proc_setup_namespaces: can't allocate /proc/namespaces\n"); + else + fs_info->proc_namespaces = namespaces; + + return ret; +} + +void __init proc_namespaces_init(void) +{ + proc_alloc_inum(&namespaces_inum); +} diff --git a/fs/proc/root.c b/fs/proc/root.c index 5e444d4f9717..e4e4f90fca3d 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -206,6 +206,10 @@ static int proc_fill_super(struct super_block *s, struct fs_context *fc) return -ENOMEM; } + ret = proc_setup_namespaces(s); + if (ret) + return ret; + ret = proc_setup_self(s); if (ret) { return ret; @@ -272,6 +276,9 @@ static void proc_kill_sb(struct super_block *sb) dput(fs_info->proc_self); dput(fs_info->proc_thread_self); + if (fs_info->proc_namespaces) + dput(fs_info->proc_namespaces); + kill_anon_super(sb); put_pid_ns(fs_info->pid_ns); kfree(fs_info); @@ -289,6 +296,7 @@ void __init proc_root_init(void) { proc_init_kmemcache(); set_proc_pid_nlink(); + proc_namespaces_init(); proc_self_init(); proc_thread_self_init(); proc_symlink("mounts", NULL, "self/mounts"); @@ -326,8 +334,15 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr static int proc_root_readdir(struct file *file, struct dir_context *ctx) { - if (ctx->pos < FIRST_PROCESS_ENTRY) { + if (ctx->pos < NAMESPACES_ENTRY) { int error = proc_readdir(file, ctx); + if (unlikely(error <= 0)) + return error; + ctx->pos = NAMESPACES_ENTRY; + } + + if (ctx->pos == NAMESPACES_ENTRY) { + int error = proc_emit_namespaces(file, ctx); if (unlikely(error <= 0)) return error; ctx->pos = FIRST_PROCESS_ENTRY; diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 97b3f5f06db9..8b0002a6cacf 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -61,6 +61,7 @@ struct proc_fs_info { struct pid_namespace *pid_ns; struct dentry *proc_self; /* For /proc/self */ struct dentry *proc_thread_self; /* For /proc/thread-self */ + struct dentry *proc_namespaces; /* For /proc/namespaces */ kgid_t pid_gid; enum proc_hidepid hide_pid; enum proc_pidonly pidonly;