On 14/11/2024 15:13, Christian Brauner wrote: > On Thu, Nov 14, 2024 at 02:13:06PM +0100, Erin Shepherd wrote: >> These two concerns combined with the special flag make me wonder if pidfs >> is so much of a special snowflake we should just special case it up front >> and skip all of the shared handle decode logic? > Care to try a patch and see what it looks like? The following is a completely untested sketch on top of the existing patch series. Some notes: - I made heavy use of the cleanup macros. I'm happy to convert things back to goto out_xx style if preferred - writing things this way just made bashing out the code without dropping resources on the floor easier - If you don't implement fh_to_dentry then name_to_handle_at will just return an error unless called with AT_HANDLE_FID. We need to decide what to do about that - The GET_PATH_FD_IS_NORMAL/etc constants don't match (what I see as) usual kernel style but I'm not sure how to conventionally express something like that Otherwise, I'm fairly happy with how it came out in the end. Maybe handle_to_path and do_handle_to_path could be collapsed into each other at this point. diff --git a/fs/fhandle.c b/fs/fhandle.c index 056116e58f43..697246085b69 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -11,6 +11,7 @@ #include <linux/personality.h> #include <linux/uaccess.h> #include <linux/compat.h> +#include <linux/pidfs.h> #include "internal.h" #include "mount.h" @@ -130,6 +131,11 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, return err; } +enum { + GET_PATH_FD_IS_NORMAL = 0, + GET_PATH_FD_IS_PIDFD = 1, +}; + static int get_path_from_fd(int fd, struct path *root) { if (fd == AT_FDCWD) { @@ -139,15 +145,16 @@ static int get_path_from_fd(int fd, struct path *root) path_get(root); spin_unlock(&fs->lock); } else { - struct fd f = fdget(fd); + CLASS(fd, f)(fd); if (!fd_file(f)) return -EBADF; + if (pidfd_pid(fd_file(f))) + return GET_PATH_FD_IS_PIDFD; *root = fd_file(f)->f_path; path_get(root); - fdput(f); } - return 0; + return GET_PATH_FD_IS_NORMAL; } enum handle_to_path_flags { @@ -287,84 +294,94 @@ static inline bool may_decode_fh(struct handle_to_path_ctx *ctx, return true; } -static int handle_to_path(int mountdirfd, struct file_handle __user *ufh, - struct path *path, unsigned int o_flags) +static int copy_handle_from_user(struct file_handle __user *ufh, struct file_handle **ret) { - int retval = 0; - struct file_handle f_handle; - struct file_handle *handle = NULL; - struct handle_to_path_ctx ctx = {}; - - retval = get_path_from_fd(mountdirfd, &ctx.root); - if (retval) - goto out_err; - - if (!may_decode_fh(&ctx, o_flags)) { - retval = -EPERM; - goto out_path; - } - + struct file_handle f_handle, *handle; if (copy_from_user(&f_handle, ufh, sizeof(struct file_handle))) { - retval = -EFAULT; - goto out_path; + return -EFAULT; } if ((f_handle.handle_bytes > MAX_HANDLE_SZ) || (f_handle.handle_bytes == 0)) { - retval = -EINVAL; - goto out_path; + return -EINVAL; } handle = kmalloc(struct_size(handle, f_handle, f_handle.handle_bytes), GFP_KERNEL); if (!handle) { - retval = -ENOMEM; - goto out_path; + return -ENOMEM; } + /* copy the full handle */ *handle = f_handle; if (copy_from_user(&handle->f_handle, &ufh->f_handle, f_handle.handle_bytes)) { - retval = -EFAULT; - goto out_handle; + kfree(handle); + return -EFAULT; } - retval = do_handle_to_path(handle, path, &ctx); + *ret = handle; + return 0; +} -out_handle: - kfree(handle); -out_path: - path_put(&ctx.root); -out_err: - return retval; +static int handle_to_path(struct path root, struct file_handle __user *ufh, + struct path *path, unsigned int o_flags) +{ + struct file_handle *handle = NULL; + struct handle_to_path_ctx ctx = { + .root = root, + }; + + if (!may_decode_fh(&ctx, o_flags)) { + return -EPERM; + } + + return do_handle_to_path(handle, path, &ctx); } static long do_handle_open(int mountdirfd, struct file_handle __user *ufh, int open_flag) { long retval = 0; - struct path path; - struct file *file; - int fd; + struct file_handle *handle __free(kfree) = NULL; + struct path __free(path_put) root = {}; + struct path __free(path_put) path = {}; + struct file *file = NULL; + int root_type; - retval = handle_to_path(mountdirfd, ufh, &path, open_flag); + root_type = get_path_from_fd(mountdirfd, &root); + if (root_type < 0) + return root_type; + + retval = copy_handle_from_user(ufh, &handle); if (retval) return retval; - fd = get_unused_fd_flags(open_flag); - if (fd < 0) { - path_put(&path); + CLASS(get_unused_fd, fd)(open_flag); + if (fd < 0) return fd; + + switch (root_type) { + case GET_PATH_FD_IS_NORMAL: + retval = handle_to_path(root, handle, &path, open_flag); + if (retval) + return retval; + file = file_open_root(&path, "", open_flag, 0); + if (IS_ERR(file)) { + return PTR_ERR(file); + } + break; + + case GET_PATH_FD_IS_PIDFD: + file = pidfd_open_by_handle( + handle->f_handle, handle->handle_bytes >> 2, handle->handle_type, + open_flag); + if (IS_ERR(file)) + return retval; + break; } - file = file_open_root(&path, "", open_flag, 0); - if (IS_ERR(file)) { - put_unused_fd(fd); - retval = PTR_ERR(file); - } else { - retval = fd; - fd_install(fd, file); - } - path_put(&path); - return retval; + + fd_install(fd, file); + return take_fd(fd); } /** diff --git a/fs/pidfs.c b/fs/pidfs.c index 0684a9b8fe71..65b72dc05380 100644 --- a/fs/pidfs.c +++ b/fs/pidfs.c @@ -453,22 +453,53 @@ static struct file_system_type pidfs_type = { .kill_sb = kill_anon_super, }; -struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) +static struct file *__pidfs_alloc_file(struct pid *pid, unsigned int flags) { - struct file *pidfd_file; struct path path; int ret; - ret = path_from_stashed(&pid->stashed, pidfs_mnt, get_pid(pid), &path); + ret = path_from_stashed(&pid->stashed, pidfs_mnt, pid, &path); if (ret < 0) return ERR_PTR(ret); pidfd_file = dentry_open(&path, flags, current_cred()); + if (!IS_ERR(pidfd_file)) + pidfd_file->f_flags |= (flags & PIDFD_THREAD); path_put(&path); return pidfd_file; } +struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags) +{ + return __pidfs_alloc_file(get_pid(pid), flags); +} + +struct file *pidfd_open_by_handle(void *gen_fid, int fh_len, int fh_type, + unsigned int flags) +{ + bool thread = flags & PIDFD_THREAD; + struct pidfd_fid *fid = (struct pidfd_fid *)gen_fid; + struct pid *pid; + + if (fh_type != FILEID_INO64_GEN || fh_len < PIDFD_FID_LEN) + return ERR_PTR(-ESTALE); + + scoped_guard(rcu) { + pid = find_pid_ns(fid->pid, &init_pid_ns); + if (!pid || pid->ino != fid->ino || pid_vnr(pid) == 0) + return ERR_PTR(-ESTALE); + if(!pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + return ERR_PTR(-ESTALE); + /* Something better? EINVAL like pidfd_open wouldn't be + very obvious... */ + + pid = get_pid(pid); + } + + return __pidfs_alloc_file(pid, flags); +} + void __init pidfs_init(void) { pidfs_mnt = kern_mount(&pidfs_type); diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 75bdf9807802..fba2654ae60f 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -3,6 +3,8 @@ #define _LINUX_PID_FS_H struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); +struct file *pidfd_open_by_handle(void *gen_fid, int fh_len, int fh_type, + unsigned int flags); void __init pidfs_init(void); #endif /* _LINUX_PID_FS_H */ diff --git a/kernel/fork.c b/kernel/fork.c index 22f43721d031..fc47c76e4ff4 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2015,11 +2015,6 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re put_unused_fd(pidfd); return PTR_ERR(pidfd_file); } - /* - * anon_inode_getfile() ignores everything outside of the - * O_ACCMODE | O_NONBLOCK mask, set PIDFD_THREAD manually. - */ - pidfd_file->f_flags |= (flags & PIDFD_THREAD); *ret = pidfd_file; return pidfd; }