The problem is that a pathname can contain absolute symlinks and now they are resolved relative to the current root. But if we want to open a file in another mount namespace and we have a file descriptor to its root directory, we want that the pathname is resolved in the target mount namespace and in this case we need these new flags O_ATROOT or AT_FDROOT. If O_ATROOT is set for openat() or AT_FDROOT is set for fstatat, linkat, unlinkat, path_init is executed with the LOOKUP_DFD_ROOT flag. v2: fix a value of O_ATROOT to not intersect with other constans Signed-off-by: Andrey Vagin <avagin@xxxxxxxxxx> --- fs/exec.c | 4 +++- fs/namei.c | 26 +++++++++++++++++--------- fs/open.c | 6 +++++- fs/stat.c | 4 +++- fs/utimes.c | 4 +++- include/uapi/asm-generic/fcntl.h | 4 ++++ include/uapi/linux/fcntl.h | 1 + 7 files changed, 36 insertions(+), 13 deletions(-) diff --git a/fs/exec.c b/fs/exec.c index 887c1c9..473b709 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -775,12 +775,14 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) .lookup_flags = LOOKUP_FOLLOW, }; - if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) + if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_FDROOT)) != 0) return ERR_PTR(-EINVAL); if (flags & AT_SYMLINK_NOFOLLOW) open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW; if (flags & AT_EMPTY_PATH) open_exec_flags.lookup_flags |= LOOKUP_EMPTY; + if (flags & AT_FDROOT) + open_exec_flags.lookup_flags |= LOOKUP_DFD_ROOT; file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) diff --git a/fs/namei.c b/fs/namei.c index 5f08b69..9958b60 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2581,7 +2581,8 @@ user_path_parent(int dfd, const char __user *path, unsigned int flags) { /* only LOOKUP_REVAL is allowed in extra flags */ - return filename_parentat(dfd, getname(path), flags & LOOKUP_REVAL, + return filename_parentat(dfd, getname(path), + flags & (LOOKUP_REVAL | LOOKUP_DFD_ROOT), parent, last, type); } @@ -3577,7 +3578,7 @@ static struct dentry *filename_create(int dfd, struct filename *name, * Note that only LOOKUP_REVAL and LOOKUP_DIRECTORY matter here. Any * other flags passed in are ignored! */ - lookup_flags &= LOOKUP_REVAL; + lookup_flags &= LOOKUP_REVAL | LOOKUP_DFD_ROOT; name = filename_parentat(dfd, name, lookup_flags, path, &last, &type); if (IS_ERR(name)) @@ -3975,7 +3976,8 @@ EXPORT_SYMBOL(vfs_unlink); * writeout happening, and we don't want to prevent access to the directory * while waiting on the I/O. */ -static long do_unlinkat(int dfd, const char __user *pathname) +static long do_unlinkat(int dfd, const char __user *pathname, + unsigned int lookup_flags) { int error; struct filename *name; @@ -3985,7 +3987,6 @@ static long do_unlinkat(int dfd, const char __user *pathname) int type; struct inode *inode = NULL; struct inode *delegated_inode = NULL; - unsigned int lookup_flags = 0; retry: name = user_path_parent(dfd, pathname, &path, &last, &type, lookup_flags); @@ -4050,18 +4051,23 @@ slashes: SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag) { - if ((flag & ~AT_REMOVEDIR) != 0) + unsigned int lookup_flags = 0; + + if ((flag & ~(AT_REMOVEDIR | AT_FDROOT)) != 0) return -EINVAL; if (flag & AT_REMOVEDIR) return do_rmdir(dfd, pathname); - return do_unlinkat(dfd, pathname); + if (flag & AT_FDROOT) + lookup_flags |= LOOKUP_DFD_ROOT; + + return do_unlinkat(dfd, pathname, lookup_flags); } SYSCALL_DEFINE1(unlink, const char __user *, pathname) { - return do_unlinkat(AT_FDCWD, pathname); + return do_unlinkat(AT_FDCWD, pathname, 0); } int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname) @@ -4212,7 +4218,7 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, int how = 0; int error; - if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) + if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_FDROOT)) != 0) return -EINVAL; /* * To use null names we require CAP_DAC_READ_SEARCH @@ -4227,13 +4233,15 @@ SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname, if (flags & AT_SYMLINK_FOLLOW) how |= LOOKUP_FOLLOW; + if (flags & AT_FDROOT) + how |= LOOKUP_DFD_ROOT; retry: error = user_path_at(olddfd, oldname, how, &old_path); if (error) return error; new_dentry = user_path_create(newdfd, newname, &new_path, - (how & LOOKUP_REVAL)); + (how & (LOOKUP_REVAL | LOOKUP_DFD_ROOT))); error = PTR_ERR(new_dentry); if (IS_ERR(new_dentry)) goto out; diff --git a/fs/open.c b/fs/open.c index 93ae3cd..e0bc8d0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -613,12 +613,14 @@ SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user, int error = -EINVAL; int lookup_flags; - if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0) + if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH | AT_FDROOT)) != 0) goto out; lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; + if (flag & AT_FDROOT) + lookup_flags |= LOOKUP_DFD_ROOT; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) @@ -941,6 +943,8 @@ static inline int build_open_flags(int flags, umode_t mode, struct open_flags *o lookup_flags |= LOOKUP_DIRECTORY; if (!(flags & O_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + if (flags & O_ATROOT) + lookup_flags |= LOOKUP_DFD_ROOT; op->lookup_flags = lookup_flags; return 0; } diff --git a/fs/stat.c b/fs/stat.c index bc045c7..d71e7f2 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -95,13 +95,15 @@ int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, unsigned int lookup_flags = 0; if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | - AT_EMPTY_PATH)) != 0) + AT_EMPTY_PATH | AT_FDROOT)) != 0) goto out; if (!(flag & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; if (flag & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; + if (flag & AT_FDROOT) + lookup_flags |= LOOKUP_DFD_ROOT; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) diff --git a/fs/utimes.c b/fs/utimes.c index 85c40f4..78a9eb9 100644 --- a/fs/utimes.c +++ b/fs/utimes.c @@ -143,7 +143,7 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, goto out; } - if (flags & ~AT_SYMLINK_NOFOLLOW) + if (flags & ~(AT_SYMLINK_NOFOLLOW | AT_FDROOT)) goto out; if (filename == NULL && dfd != AT_FDCWD) { @@ -165,6 +165,8 @@ long do_utimes(int dfd, const char __user *filename, struct timespec *times, if (!(flags & AT_SYMLINK_NOFOLLOW)) lookup_flags |= LOOKUP_FOLLOW; + if (flags & AT_FDROOT) + lookup_flags |= LOOKUP_DFD_ROOT; retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index e063eff..0436b1d 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -88,6 +88,10 @@ #define __O_TMPFILE 020000000 #endif +#ifndef O_ATROOT +#define O_ATROOT 040000000 /* dfd is a root */ +#endif + /* a horrid kludge trying to make sure that this will fail on old kernels */ #define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) #define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT) diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index beed138..4f3b631 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -62,6 +62,7 @@ #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ +#define AT_FDROOT 0x2000 /* Resolve a path as if dirfd is root */ #endif /* _UAPI_LINUX_FCNTL_H */ -- 2.5.5 _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/containers