[PATCH RFC v2] fhandle: expose u64 mount id to name_to_handle_at(2)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Now that we provide a unique 64-bit mount ID interface in statx, we can
now provide a race-free way for name_to_handle_at(2) to provide a file
handle and corresponding mount without needing to worry about racing
with /proc/mountinfo parsing.

While this is not necessary if you are using AT_EMPTY_PATH and don't
care about an extra statx(2) call, users that pass full paths into
name_to_handle_at(2) need to know which mount the file handle comes from
(to make sure they don't try to open_by_handle_at a file handle from a
different filesystem) and switching to AT_EMPTY_PATH would require
allocating a file for every name_to_handle_at(2) call, turning

  err = name_to_handle_at(-EBADF, "/foo/bar/baz", &handle, &mntid,
                          AT_HANDLE_MNT_ID_UNIQUE);

into

  int fd = openat(-EBADF, "/foo/bar/baz", O_PATH | O_CLOEXEC);
  err1 = name_to_handle_at(fd, "", &handle, &unused_mntid, AT_EMPTY_PATH);
  err2 = statx(fd, "", AT_EMPTY_PATH, STATX_MNT_ID_UNIQUE, &statxbuf);
  mntid = statxbuf.stx_mnt_id;
  close(fd);

Unlike AT_HANDLE_FID, as per Amir's suggestion, AT_HANDLE_MNT_ID_UNIQUE
uses a new AT_* bit from the historically-unused 0xFF range (which we
now define as being the "per-syscall" range for AT_* bits).

Signed-off-by: Aleksa Sarai <cyphar@xxxxxxxxxx>
---
Changes in v2:
- Fixed a few minor compiler warnings and a buggy copy_to_user() check.
- Rename AT_HANDLE_UNIQUE_MOUNT_ID -> AT_HANDLE_MNT_ID_UNIQUE to match statx.
- Switched to using an AT_* bit from 0xFF and defining that range as
  being "per-syscall" for future usage.
- Sync tools/ copy of <linux/fcntl.h> to include changes.
- v1: <https://lore.kernel.org/r/20240520-exportfs-u64-mount-id-v1-1-f55fd9215b8e@xxxxxxxxxx>
---
 fs/fhandle.c                     | 29 ++++++++++++++++++++++-------
 include/linux/syscalls.h         |  2 +-
 include/uapi/linux/fcntl.h       | 28 +++++++++++++++++++++-------
 tools/include/uapi/linux/fcntl.h | 28 +++++++++++++++++++++-------
 4 files changed, 65 insertions(+), 22 deletions(-)

diff --git a/fs/fhandle.c b/fs/fhandle.c
index 8a7f86c2139a..c6e90161b900 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -16,7 +16,8 @@
 
 static long do_sys_name_to_handle(const struct path *path,
 				  struct file_handle __user *ufh,
-				  int __user *mnt_id, int fh_flags)
+				  void __user *mnt_id, bool unique_mntid,
+				  int fh_flags)
 {
 	long retval;
 	struct file_handle f_handle;
@@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path,
 	} else
 		retval = 0;
 	/* copy the mount id */
-	if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
-	    copy_to_user(ufh, handle,
-			 struct_size(handle, f_handle, handle_bytes)))
+	if (unique_mntid) {
+		if (put_user(real_mount(path->mnt)->mnt_id_unique,
+			     (u64 __user *) mnt_id))
+			retval = -EFAULT;
+	} else {
+		if (put_user(real_mount(path->mnt)->mnt_id,
+			     (int __user *) mnt_id))
+			retval = -EFAULT;
+	}
+	/* copy the handle */
+	if (retval != -EFAULT &&
+		copy_to_user(ufh, handle,
+			     struct_size(handle, f_handle, handle_bytes)))
 		retval = -EFAULT;
 	kfree(handle);
 	return retval;
@@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path,
  * @name: name that should be converted to handle.
  * @handle: resulting file handle
  * @mnt_id: mount id of the file system containing the file
+ *          (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
  * @flag: flag value to indicate whether to follow symlink or not
  *        and whether a decodable file handle is required.
  *
@@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path,
  * value required.
  */
 SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
-		struct file_handle __user *, handle, int __user *, mnt_id,
+		struct file_handle __user *, handle, void __user *, mnt_id,
 		int, flag)
 {
 	struct path path;
@@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
 	int fh_flags;
 	int err;
 
-	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
+	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
+		     AT_HANDLE_MNT_ID_UNIQUE))
 		return -EINVAL;
 
 	lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
 		lookup_flags |= LOOKUP_EMPTY;
 	err = user_path_at(dfd, name, lookup_flags, &path);
 	if (!err) {
-		err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
+		err = do_sys_name_to_handle(&path, handle, mnt_id,
+					    flag & AT_HANDLE_MNT_ID_UNIQUE,
+					    fh_flags);
 		path_put(&path);
 	}
 	return err;
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index e619ac10cd23..99c5a783a01e 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -863,7 +863,7 @@ asmlinkage long sys_fanotify_mark(int fanotify_fd, unsigned int flags,
 				  const char  __user *pathname);
 asmlinkage long sys_name_to_handle_at(int dfd, const char __user *name,
 				      struct file_handle __user *handle,
-				      int __user *mnt_id, int flag);
+				      void __user *mnt_id, int flag);
 asmlinkage long sys_open_by_handle_at(int mountdirfd,
 				      struct file_handle __user *handle,
 				      int flags);
diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h
index c0bcc185fa48..9ed9d65842c1 100644
--- a/include/uapi/linux/fcntl.h
+++ b/include/uapi/linux/fcntl.h
@@ -87,22 +87,24 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+#define AT_FDCWD		-100    /* Special value used to indicate
+                                           openat should use the current
+                                           working directory. */
+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+
 /*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS
+ * is meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
  * unlinkat.  The two functions do completely different things and therefore,
  * the flags can be allowed to overlap.  For example, passing AT_REMOVEDIR to
  * faccessat would be undefined behavior and thus treating it equivalent to
  * AT_EACCESS is valid undefined behavior.
  */
-#define AT_FDCWD		-100    /* Special value used to indicate
-                                           openat should use the current
-                                           working directory. */
-#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
 #define AT_EACCESS		0x200	/* Test access permitted for
                                            effective IDs, not real IDs.  */
 #define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                            unlinking file.  */
+
 #define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
 #define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
 #define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
@@ -114,10 +116,22 @@
 
 #define AT_RECURSIVE		0x8000	/* Apply to the entire subtree */
 
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+/*
+ * All new purely-syscall-specific AT_* flags should consider using bits from
+ * 0xFF, but the bits used by RENAME_* (0x7) should be avoided in case users
+ * decide to pass AT_* flags to renameat2() by accident. These flag bits are
+ * free for re-use by other syscall's syscall-specific flags without worry.
+ */
+
+/*
+ * Flags for name_to_handle_at(2). To save AT_ flag space we re-use the
+ * AT_EACCESS/AT_REMOVEDIR bit for AT_HANDLE_FID.
+ */
 #define AT_HANDLE_FID		AT_REMOVEDIR	/* file handle is needed to
 					compare object identity and may not
 					be usable to open_by_handle_at(2) */
+#define AT_HANDLE_MNT_ID_UNIQUE	0x80	/* return the u64 unique mount id */
+
 #if defined(__KERNEL__)
 #define AT_GETATTR_NOSEC	0x80000000
 #endif
diff --git a/tools/include/uapi/linux/fcntl.h b/tools/include/uapi/linux/fcntl.h
index 282e90aeb163..f671312ca481 100644
--- a/tools/include/uapi/linux/fcntl.h
+++ b/tools/include/uapi/linux/fcntl.h
@@ -85,22 +85,24 @@
 #define DN_ATTRIB	0x00000020	/* File changed attibutes */
 #define DN_MULTISHOT	0x80000000	/* Don't remove notifier */
 
+#define AT_FDCWD		-100    /* Special value used to indicate
+                                           openat should use the current
+                                           working directory. */
+#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
+
 /*
- * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS is
- * meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
+ * The constants AT_REMOVEDIR and AT_EACCESS have the same value.  AT_EACCESS
+ * is meaningful only to faccessat, while AT_REMOVEDIR is meaningful only to
  * unlinkat.  The two functions do completely different things and therefore,
  * the flags can be allowed to overlap.  For example, passing AT_REMOVEDIR to
  * faccessat would be undefined behavior and thus treating it equivalent to
  * AT_EACCESS is valid undefined behavior.
  */
-#define AT_FDCWD		-100    /* Special value used to indicate
-                                           openat should use the current
-                                           working directory. */
-#define AT_SYMLINK_NOFOLLOW	0x100   /* Do not follow symbolic links.  */
 #define AT_EACCESS		0x200	/* Test access permitted for
                                            effective IDs, not real IDs.  */
 #define AT_REMOVEDIR		0x200   /* Remove directory instead of
                                            unlinking file.  */
+
 #define AT_SYMLINK_FOLLOW	0x400   /* Follow symbolic links.  */
 #define AT_NO_AUTOMOUNT		0x800	/* Suppress terminal automount traversal */
 #define AT_EMPTY_PATH		0x1000	/* Allow empty relative pathname */
@@ -112,10 +114,22 @@
 
 #define AT_RECURSIVE		0x8000	/* Apply to the entire subtree */
 
-/* Flags for name_to_handle_at(2). We reuse AT_ flag space to save bits... */
+/*
+ * All new purely-syscall-specific AT_* flags should consider using bits from
+ * 0xFF, but the bits used by RENAME_* (0x7) should be avoided in case users
+ * decide to pass AT_* flags to renameat2() by accident. These flag bits are
+ * free for re-use by other syscall's syscall-specific flags without worry.
+ */
+
+/*
+ * Flags for name_to_handle_at(2). To save AT_ flag space we re-use the
+ * AT_EACCESS/AT_REMOVEDIR bit for AT_HANDLE_FID.
+ */
 #define AT_HANDLE_FID		AT_REMOVEDIR	/* file handle is needed to
 					compare object identity and may not
 					be usable to open_by_handle_at(2) */
+#define AT_HANDLE_MNT_ID_UNIQUE	0x80 /* returned mount id is the u64 unique mount id */
+
 #if defined(__KERNEL__)
 #define AT_GETATTR_NOSEC	0x80000000
 #endif

---
base-commit: 584bbf439d0fa83d728ec49f3a38c581bdc828b4
change-id: 20240515-exportfs-u64-mount-id-9ebb5c58b53c

Best regards,
-- 
Aleksa Sarai <cyphar@xxxxxxxxxx>





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux