For users that hold a reference to a pidfd procfs might not even be available nor is it desirable to parse through procfs just for the sake of getting namespace file descriptors for a process. Make it possible to directly retrieve namespace file descriptors from a pidfd. Pidfds already can be used with setns() to change a set of namespaces atomically. Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> --- fs/pidfs.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++ include/uapi/linux/pidfd.h | 14 +++++++ 2 files changed, 106 insertions(+) diff --git a/fs/pidfs.c b/fs/pidfs.c index dbb9d854d1c5..957284e8b2dd 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -11,10 +11,16 @@ #include <linux/proc_fs.h> #include <linux/proc_ns.h> #include <linux/pseudo_fs.h> +#include <linux/ptrace.h> #include <linux/seq_file.h> #include <uapi/linux/pidfd.h> +#include <linux/ipc_namespace.h> +#include <linux/time_namespace.h> +#include <linux/utsname.h> +#include <net/net_namespace.h> #include "internal.h" +#include "mount.h" #ifdef CONFIG_PROC_FS /** @@ -108,11 +114,97 @@ static __poll_t pidfd_poll(struct file *file, struct poll_table_struct *pts) return poll_flags; } +static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + struct task_struct *task __free(put_task) = NULL; + struct nsproxy *nsp __free(put_nsproxy) = NULL; + struct user_namespace *user_ns = NULL; + struct pid_namespace *pid_ns = NULL; + struct pid *pid = pidfd_pid(file); + struct ns_common *ns_common; + + if (arg) + return -EINVAL; + + task = get_pid_task(pid, PIDTYPE_PID); + if (!task) + return -ESRCH; + + scoped_guard(task_lock, task) { + nsp = task->nsproxy; + if (nsp) + get_nsproxy(nsp); + } + if (!nsp) + return -ESRCH; /* just pretend it didn't exist */ + + /* + * We're trying to open a file descriptor to the namespace so perform a + * filesystem cred ptrace check. Also, we mirror nsfs behavior. + */ + if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS)) + return -EACCES; + + switch (cmd) { + case PIDFD_GET_CGROUP_NAMESPACE: + get_cgroup_ns(nsp->cgroup_ns); + ns_common = &nsp->cgroup_ns->ns; + break; + case PIDFD_GET_IPC_NAMESPACE: + get_ipc_ns(nsp->ipc_ns); + ns_common = &nsp->ipc_ns->ns; + break; + case PIDFD_GET_MNT_NAMESPACE: + get_mnt_ns(nsp->mnt_ns); + ns_common = &nsp->mnt_ns->ns; + break; + case PIDFD_GET_NET_NAMESPACE: + ns_common = &nsp->net_ns->ns; + get_net_ns(ns_common); + break; + case PIDFD_GET_PID_NAMESPACE: + rcu_read_lock(); + pid_ns = get_pid_ns(task_active_pid_ns(task)); + rcu_read_unlock(); + ns_common = &pid_ns->ns; + break; + case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE: + get_pid_ns(nsp->pid_ns_for_children); + ns_common = &nsp->pid_ns_for_children->ns; + break; + case PIDFD_GET_TIME_NAMESPACE: + get_time_ns(nsp->time_ns); + ns_common = &nsp->time_ns->ns; + break; + case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE: + get_time_ns(nsp->time_ns_for_children); + ns_common = &nsp->time_ns_for_children->ns; + break; + case PIDFD_GET_USER_NAMESPACE: + rcu_read_lock(); + user_ns = get_user_ns(task_cred_xxx(task, user_ns)); + rcu_read_unlock(); + ns_common = &user_ns->ns; + break; + case PIDFD_GET_UTS_NAMESPACE: + get_uts_ns(nsp->uts_ns); + ns_common = &nsp->uts_ns->ns; + break; + default: + return -ENOIOCTLCMD; + } + + /* open_namespace() unconditionally consumes the reference */ + return open_namespace(ns_common); +} + static const struct file_operations pidfs_file_operations = { .poll = pidfd_poll, #ifdef CONFIG_PROC_FS .show_fdinfo = pidfd_show_fdinfo, #endif + .unlocked_ioctl = pidfd_ioctl, + .compat_ioctl = compat_ptr_ioctl, }; struct pid *pidfd_pid(const struct file *file) diff --git a/include/uapi/linux/pidfd.h b/include/uapi/linux/pidfd.h index 72ec000a97cd..565fc0629fff 100644 --- a/include/uapi/linux/pidfd.h +++ b/include/uapi/linux/pidfd.h @@ -5,6 +5,7 @@ #include <linux/types.h> #include <linux/fcntl.h> +#include <linux/ioctl.h> /* Flags for pidfd_open(). */ #define PIDFD_NONBLOCK O_NONBLOCK @@ -15,4 +16,17 @@ #define PIDFD_SIGNAL_THREAD_GROUP (1UL << 1) #define PIDFD_SIGNAL_PROCESS_GROUP (1UL << 2) +#define PIDFS_IOCTL_MAGIC 0xFF + +#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1) +#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2) +#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3) +#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4) +#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5) +#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6) +#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7) +#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8) +#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9) +#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10) + #endif /* _UAPI_LINUX_PIDFD_H */ -- 2.43.0