Now that we provide a unique 64-bit mount ID interface in statx, we can now provide a race-free way for name_to_handle_at(2) to provide a file handle and corresponding mount without needing to worry about racing with /proc/mountinfo parsing. While this is not necessary if you are using AT_EMPTY_PATH and don't care about an extra statx(2) call, users that pass full paths into name_to_handle_at(2) need to know which mount the file handle comes from (to make sure they don't try to open_by_handle_at a file handle from a different filesystem) and switching to AT_EMPTY_PATH would require allocating a file for every name_to_handle_at(2) call, turning err = name_to_handle_at(-EBADF, "/foo/bar/baz", &handle, &mntid, AT_HANDLE_MNT_ID_UNIQUE); into int fd = openat(-EBADF, "/foo/bar/baz", O_PATH | O_CLOEXEC); err1 = name_to_handle_at(fd, "", &handle, &unused_mntid, AT_EMPTY_PATH); err2 = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &statxbuf); mntid = statxbuf.stx_mnt_id; close(fd); Unlike AT_HANDLE_FID, as per Amir's suggestion, AT_HANDLE_MNT_ID_UNIQUE uses a new AT_* bit from the historically-unused 0xFF range (which we now define as being the "per-syscall" range for AT_* bits). Signed-off-by: Aleksa Sarai <cyphar@xxxxxxxxxx> --- Changes in v2: - Fixed a few minor compiler warnings and a buggy copy_to_user() check. - Rename AT_HANDLE_UNIQUE_MOUNT_ID -> AT_HANDLE_MNT_ID_UNIQUE to match statx. - Switched to using an AT_* bit from 0xFF and defining that range as being "per-syscall" for future usage. - Sync tools/ copy of <linux/fcntl.h> to include changes. - v1: <https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e@xxxxxxxxxx> --- fs/fhandle.c | 29 ++++++++++++++++++++++------- include/linux/syscalls.h | 2 +- include/uapi/linux/fcntl.h | 28 +++++++++++++++++++++------- tools/include/uapi/linux/fcntl.h | 28 +++++++++++++++++++++------- 4 files changed, 65 insertions(+), 22 deletions(-) diff --git a/fs/fhandle.c b/fs/fhandle.c index 8a7f86c2139a..c6e90161b900 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -16,7 +16,8 @@ static long do_sys_name_to_handle(const struct path *path, struct file_handle __user *ufh, - int __user *mnt_id, int fh_flags) + void __user *mnt_id, bool unique_mntid, + int fh_flags) { long retval; struct file_handle f_handle; @@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path, } else retval = 0; /* copy the mount id */ - if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || - copy_to_user(ufh, handle, - struct_size(handle, f_handle, handle_bytes))) + if (unique_mntid) { + if (put_user(real_mount(path->mnt)->mnt_id_unique, + (u64 __user *) mnt_id)) + retval = -EFAULT; + } else { + if (put_user(real_mount(path->mnt)->mnt_id, + (int __user *) mnt_id)) + retval = -EFAULT; + } + /* copy the handle */ + if (retval != -EFAULT && + copy_to_user(ufh, handle, + struct_size(handle, f_handle, handle_bytes))) retval = -EFAULT; kfree(handle); return retval; @@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path, * @name: name that should be converted to handle. * @handle: resulting file handle * @mnt_id: mount id of the file system containing the file + * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int) * @flag: flag value to indicate whether to follow symlink or not * and whether a decodable file handle is required. * @@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path, * value required. */ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, - struct file_handle __user *, handle, int __user *, mnt_id, + struct file_handle __user *, handle, void __user *, mnt_id, int, flag) { struct path path; @@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, int fh_flags; int err; - if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID)) + if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID | + AT_HANDLE_MNT_ID_UNIQUE)) return -EINVAL; lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; @@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, lookup_flags |= LOOKUP_EMPTY; err = user_path_at(dfd, name, lookup_flags, &path); if (!err) { - err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags); + err = do_sys_name_to_handle(&path, handle, mnt_id, + flag & AT_HANDLE_MNT_ID_UNIQUE, + fh_flags); path_put(&path); } return err; diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e619ac10cd23..99c5a783a01e 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -863,7 +863,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags, const char __user *pathname); asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name, struct file_handle __user *handle, - int __user *mnt_id, int flag); + void __user *mnt_id, int flag); asmlinkage long sys_open_by_handle_at(int mountdirfd, struct file_handle __user *handle, int flags); diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index c0bcc185fa48..9ed9d65842c1 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -87,22 +87,24 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ + /* - * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is - * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS + * is meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to * unlinkat. The two functions do completely different things and therefore, * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to * faccessat would be undefined behavior and thus treating it equivalent to * AT_EACCESS is valid undefined behavior. */ -#define AT_FDCWD -100 /* Special value used to indicate - openat should use the current - working directory. */ -#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ #define AT_EACCESS 0x200 /* Test access permitted for effective IDs, not real IDs. */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ + #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ @@ -114,10 +116,22 @@ #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ +/* + * All new purely-syscall-specific AT_* flags should consider using bits from + * 0xFF, but the bits used by RENAME_* (0x7) should be avoided in case users + * decide to pass AT_* flags to renameat2() by accident. These flag bits are + * free for re-use by other syscall's syscall-specific flags without worry. + */ + +/* + * Flags for name_to_handle_at(2). To save AT_ flag space we re-use the + * AT_EACCESS/AT_REMOVEDIR bit for AT_HANDLE_FID. + */ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#define AT_HANDLE_MNT_ID_UNIQUE 0x80 /* return the u64 unique mount id */ + #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 #endif diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h index 282e90aeb163..f671312ca481 100644 --- a/tools/include/uapi/linux/fcntl.h +++ b/tools/include/uapi/linux/fcntl.h @@ -85,22 +85,24 @@ #define DN_ATTRIB 0x00000020 /* File changed attibutes */ #define DN_MULTISHOT 0x80000000 /* Don't remove notifier */ +#define AT_FDCWD -100 /* Special value used to indicate + openat should use the current + working directory. */ +#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ + /* - * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS is - * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to + * The constants AT_REMOVEDIR and AT_EACCESS have the same value. AT_EACCESS + * is meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to * unlinkat. The two functions do completely different things and therefore, * the flags can be allowed to overlap. For example, passing AT_REMOVEDIR to * faccessat would be undefined behavior and thus treating it equivalent to * AT_EACCESS is valid undefined behavior. */ -#define AT_FDCWD -100 /* Special value used to indicate - openat should use the current - working directory. */ -#define AT_SYMLINK_NOFOLLOW 0x100 /* Do not follow symbolic links. */ #define AT_EACCESS 0x200 /* Test access permitted for effective IDs, not real IDs. */ #define AT_REMOVEDIR 0x200 /* Remove directory instead of unlinking file. */ + #define AT_SYMLINK_FOLLOW 0x400 /* Follow symbolic links. */ #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ @@ -112,10 +114,22 @@ #define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */ -/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */ +/* + * All new purely-syscall-specific AT_* flags should consider using bits from + * 0xFF, but the bits used by RENAME_* (0x7) should be avoided in case users + * decide to pass AT_* flags to renameat2() by accident. These flag bits are + * free for re-use by other syscall's syscall-specific flags without worry. + */ + +/* + * Flags for name_to_handle_at(2). To save AT_ flag space we re-use the + * AT_EACCESS/AT_REMOVEDIR bit for AT_HANDLE_FID. + */ #define AT_HANDLE_FID AT_REMOVEDIR /* file handle is needed to compare object identity and may not be usable to open_by_handle_at(2) */ +#define AT_HANDLE_MNT_ID_UNIQUE 0x80 /* returned mount id is the u64 unique mount id */ + #if defined(__KERNEL__) #define AT_GETATTR_NOSEC 0x80000000 #endif --- base-commit: 584bbf439d0fa83d728ec49f3a38c581bdc828b4 change-id: 20240515-exportfs-u64-mount-id-9ebb5c58b53c Best regards, -- Aleksa Sarai <cyphar@xxxxxxxxxx>