Re: [RFC PATCH 2/3] add statmnt(2) syscall

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 19 Sept 2023 at 11:07, Christian Brauner <brauner@xxxxxxxxxx> wrote:
>
> On Tue, Sep 19, 2023 at 10:02:17AM +0200, Miklos Szeredi wrote:

> > This is where the thread started.  Knowing the size of the buffer is
> > no good, since the needed buffer could change between calls.
>
> The same problem would exist for the single buffer. Realistically, users
> will most often simply use a fixed size PATH_MAX buffer that will cover
> most cases and fallback to allocating a larger buffer in case things go
> awry.

Exactly.  A large buffer will work in 99.99% of the cases. Good
quality implementations will deal with the 0.01% as well, but
optimizing that case is nonsense.

> Really, the main things I care about are 64 bit alignment of the whole
> struct, typed __u64 pointers

Okay.

>  with __u32 size for mnt_root and mnt_point

Unnecessary if the strings are nul terminated.

> and that we please spell out "mount" and not use "mnt": so statmount
> because the new mount api uses "mount" (move_mount(), mount_setattr(),
> fsmount(), MOUNT_ATTR_*) almost everywhere.

Okay.

Incremental below.

Also pushed to:

  git://git.kernel.org/pub/scm/linux/kernel/git/mszeredi/vfs.git#statmount

Thanks,
Miklos


diff --git a/arch/x86/entry/syscalls/syscall_64.tbl
b/arch/x86/entry/syscalls/syscall_64.tbl
index 0d9a47b0ce9b..a1b3ce7d22cc 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -375,8 +375,8 @@
 451    common  cachestat               sys_cachestat
 452    common  fchmodat2               sys_fchmodat2
 453    64      map_shadow_stack        sys_map_shadow_stack
-454    common  statmnt                 sys_statmnt
-455    common  listmnt                 sys_listmnt
+454    common  statmount               sys_statmount
+455    common  listmount               sys_listmount

 #
 # Due to a historical design error, certain syscalls are numbered differently
diff --git a/fs/namespace.c b/fs/namespace.c
index 5362b1ffb26f..803003052bfb 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -68,9 +68,8 @@ static u64 event;
 static DEFINE_IDA(mnt_id_ida);
 static DEFINE_IDA(mnt_group_ida);

-/* Don't allow confusion with mount ID allocated wit IDA */
-#define OLD_MNT_ID_MAX UINT_MAX
-static atomic64_t mnt_id_ctr = ATOMIC64_INIT(OLD_MNT_ID_MAX);
+/* Don't allow confusion with old 32bit mount ID */
+static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);

 static struct hlist_head *mount_hashtable __read_mostly;
 static struct hlist_head *mountpoint_hashtable __read_mostly;
@@ -4679,14 +4678,6 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const
char __user *, path,
        return err;
 }

-static bool mnt_id_match(struct mount *mnt, u64 id)
-{
-       if (id <= OLD_MNT_ID_MAX)
-               return id == mnt->mnt_id;
-       else
-               return id == mnt->mnt_id_unique;
-}
-
 struct vfsmount *lookup_mnt_in_ns(u64 id, struct mnt_namespace *ns)
 {
        struct mount *mnt;
@@ -4694,7 +4685,7 @@ struct vfsmount *lookup_mnt_in_ns(u64 id, struct
mnt_namespace *ns)

        lock_ns_list(ns);
        list_for_each_entry(mnt, &ns->list, mnt_list) {
-               if (!mnt_is_cursor(mnt) && mnt_id_match(mnt, id)) {
+               if (!mnt_is_cursor(mnt) && id == mnt->mnt_id_unique) {
                        res = &mnt->mnt;
                        break;
                }
@@ -4747,7 +4738,7 @@ static int stmt_string_seq(struct stmt_state *s,
stmt_func_t func)
 }

 static void stmt_string(struct stmt_state *s, u64 mask, stmt_func_t func,
-                      stmt_str_t *str)
+                      u64 *str)
 {
        int ret = s->pos >= s->bufsize ? -EOVERFLOW : 0;
        struct statmnt *sm = &s->sm;
@@ -4767,8 +4758,7 @@ static void stmt_string(struct stmt_state *s,
u64 mask, stmt_func_t func,
                if (copy_to_user(s->buf + s->pos, seq->buf, seq->count)) {
                        s->err = -EFAULT;
                } else {
-                       str->off = s->pos;
-                       str->len = seq->count - 1;
+                       *str = (unsigned long) (s->buf + s->pos);
                        s->pos += seq->count;
                }
        }
@@ -4899,39 +4889,10 @@ static int stmt_fs_type(struct stmt_state *s)
        struct super_block *sb = s->mnt->mnt_sb;

        seq_puts(seq, sb->s_type->name);
-       if (sb->s_subtype) {
-               seq_putc(seq, '.');
-               seq_puts(seq, sb->s_subtype);
-       }
-       return 0;
-}
-
-static int stmt_sb_opts(struct stmt_state *s)
-{
-       struct seq_file *seq = &s->seq;
-       struct super_block *sb = s->mnt->mnt_sb;
-       char *p, *end, *next, *u = seq->buf;
-       int err;
-
-       if (!sb->s_op->show_options)
-               return 0;
-
-       err = sb->s_op->show_options(seq, s->mnt->mnt_root);
-       if (err || seq_has_overflowed(seq) || !seq->count)
-               return err;
-
-       end = seq->buf + seq->count;
-       *end = '\0';
-       for (p = seq->buf + 1; p < end; p = next + 1) {
-               next = strchrnul(p, ',');
-               *next = '\0';
-               u += string_unescape(p, u, 0, UNESCAPE_OCTAL) + 1;
-       }
-       seq->count = u - 1 - seq->buf;
        return 0;
 }

-static int do_statmnt(struct stmt_state *s)
+static int do_statmount(struct stmt_state *s)
 {
        struct statmnt *sm = &s->sm;
        struct mount *m = real_mount(s->mnt);
@@ -4946,7 +4907,6 @@ static int do_statmnt(struct stmt_state *s)
        stmt_string(s, STMT_MNT_ROOT, stmt_mnt_root, &sm->mnt_root);
        stmt_string(s, STMT_MOUNTPOINT, stmt_mountpoint, &sm->mountpoint);
        stmt_string(s, STMT_FS_TYPE, stmt_fs_type, &sm->fs_type);
-       stmt_string(s, STMT_SB_OPTS, stmt_sb_opts, &sm->sb_opts);

        if (s->err)
                return s->err;
@@ -4957,7 +4917,7 @@ static int do_statmnt(struct stmt_state *s)
        return 0;
 }

-SYSCALL_DEFINE5(statmnt, u64, mnt_id,
+SYSCALL_DEFINE5(statmount, u64, mnt_id,
                u64, mask, struct statmnt __user *, buf,
                size_t, bufsize, unsigned int, flags)
 {
@@ -4980,7 +4940,7 @@ SYSCALL_DEFINE5(statmnt, u64, mnt_id,
                };

                get_fs_root(current->fs, &s.root);
-               err = do_statmnt(&s);
+               err = do_statmount(&s);
                path_put(&s.root);
        }
        up_read(&namespace_sem);
@@ -4988,19 +4948,25 @@ SYSCALL_DEFINE5(statmnt, u64, mnt_id,
        return err;
 }

-static long do_listmnt(struct vfsmount *mnt, u64 __user *buf, size_t bufsize,
-                     const struct path *root)
+static long do_listmount(struct vfsmount *mnt, u64 __user *buf, size_t bufsize,
+                        const struct path *root, unsigned int flags)
 {
        struct mount *r, *m = real_mount(mnt);
        struct path rootmnt = { .mnt = root->mnt, .dentry =
root->mnt->mnt_root };
        long ctr = 0;
+       bool reachable_only = true;

-       if (!capable(CAP_SYS_ADMIN) &&
-           !is_path_reachable(m, mnt->mnt_root, &rootmnt))
-               return -EPERM;
+       if (flags & LISTMOUNT_UNREACHABLE) {
+               if (!capable(CAP_SYS_ADMIN))
+                       return -EPERM;
+               reachable_only = false;
+       }
+
+       if (reachable_only && !is_path_reachable(m, mnt->mnt_root, &rootmnt))
+               return capable(CAP_SYS_ADMIN) ? 0 : -EPERM;

        list_for_each_entry(r, &m->mnt_mounts, mnt_child) {
-               if (!capable(CAP_SYS_ADMIN) &&
+               if (reachable_only &&
                    !is_path_reachable(r, r->mnt.mnt_root, root))
                        continue;

@@ -5015,14 +4981,14 @@ static long do_listmnt(struct vfsmount *mnt,
u64 __user *buf, size_t bufsize,
        return ctr;
 }

-SYSCALL_DEFINE4(listmnt, u64, mnt_id, u64 __user *, buf, size_t, bufsize,
+SYSCALL_DEFINE4(listmount, u64, mnt_id, u64 __user *, buf, size_t, bufsize,
                unsigned int, flags)
 {
        struct vfsmount *mnt;
        struct path root;
        long err;

-       if (flags)
+       if (flags & ~LISTMOUNT_UNREACHABLE)
                return -EINVAL;

        down_read(&namespace_sem);
@@ -5030,7 +4996,7 @@ SYSCALL_DEFINE4(listmnt, u64, mnt_id, u64 __user
*, buf, size_t, bufsize,
        err = -ENOENT;
        if (mnt) {
                get_fs_root(current->fs, &root);
-               err = do_listmnt(mnt, buf, bufsize, &root);
+               err = do_listmount(mnt, buf, bufsize, &root, flags);
                path_put(&root);
        }
        up_read(&namespace_sem);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 5d776cdb6f18..a35fb7b2c842 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -74,6 +74,7 @@ struct landlock_ruleset_attr;
 enum landlock_rule_type;
 struct cachestat_range;
 struct cachestat;
+struct statmnt;

 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -408,11 +409,11 @@ asmlinkage long sys_statfs64(const char __user
*path, size_t sz,
 asmlinkage long sys_fstatfs(unsigned int fd, struct statfs __user *buf);
 asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz,
                                struct statfs64 __user *buf);
-asmlinkage long sys_statmnt(u64 mnt_id, u64 mask,
-                           struct statmnt __user *buf, size_t bufsize,
-                           unsigned int flags);
-asmlinkage long sys_listmnt(u64 mnt_id, u64 __user *buf, size_t bufsize,
-                           unsigned int flags);
+asmlinkage long sys_statmount(u64 mnt_id, u64 mask,
+                             struct statmnt __user *buf, size_t bufsize,
+                             unsigned int flags);
+asmlinkage long sys_listmount(u64 mnt_id, u64 __user *buf, size_t bufsize,
+                             unsigned int flags);
 asmlinkage long sys_truncate(const char __user *path, long length);
 asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length);
 #if BITS_PER_LONG == 32
diff --git a/include/uapi/asm-generic/unistd.h
b/include/uapi/asm-generic/unistd.h
index a2b41370f603..8df6a747e21a 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -823,11 +823,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat)
 #define __NR_fchmodat2 452
 __SYSCALL(__NR_fchmodat2, sys_fchmodat2)

-#define __NR_statmnt   454
-__SYSCALL(__NR_statmnt, sys_statmnt)
+#define __NR_statmount   454
+__SYSCALL(__NR_statmount, sys_statmount)

-#define __NR_listmnt   455
-__SYSCALL(__NR_listmnt, sys_listmnt)
+#define __NR_listmount   455
+__SYSCALL(__NR_listmount, sys_listmount)

 #undef __NR_syscalls
 #define __NR_syscalls 456
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index 4ec7308a9259..d98b41024507 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -138,11 +138,6 @@ struct mount_attr {
 /* List of all mount_attr versions. */
 #define MOUNT_ATTR_SIZE_VER0   32 /* sizeof first published struct */

-struct stmt_str {
-       __u32 off;
-       __u32 len;
-};
-
 struct statmnt {
        __u64 mask;             /* What results were written [uncond] */
        __u32 sb_dev_major;     /* Device ID */
@@ -159,11 +154,10 @@ struct statmnt {
        __u64 mnt_peer_group;   /* ID of shared peer group */
        __u64 mnt_master;       /* Mount receives propagation from this ID */
        __u64 propagate_from;   /* Propagation from in current namespace */
-       __u64 __spare[20];
-       struct stmt_str mnt_root;       /* Root of mount relative to
root of fs */
-       struct stmt_str mountpoint;     /* Mountpoint relative to root
of process */
-       struct stmt_str fs_type;        /* Filesystem type[.subtype] */
-       struct stmt_str sb_opts;        /* Super block string options
(nul delimted) */
+       __u64 mnt_root;         /* [str] Root of mount relative to root of fs */
+       __u64 mountpoint;       /* [str] Mountpoint relative to root
of process */
+       __u64 fs_type;          /* [srt] Filesystem type */
+       __u64 __spare[49];
 };

 #define STMT_SB_BASIC          0x00000001U     /* Want/got sb_... */
@@ -172,6 +166,8 @@ struct statmnt {
 #define STMT_MNT_ROOT          0x00000008U     /* Want/got mnt_root  */
 #define STMT_MOUNTPOINT                0x00000010U     /* Want/got
mountpoint */
 #define STMT_FS_TYPE           0x00000020U     /* Want/got fs_type */
-#define STMT_SB_OPTS           0x00000040U     /* Want/got sb_opts */
+
+/* listmount(2) flags */
+#define LISTMOUNT_UNREACHABLE  0x01    /* List unreachable mounts too */

 #endif /* _UAPI_LINUX_MOUNT_H */



[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux