On Wed, Sep 13, 2023 at 6:22 PM Miklos Szeredi <mszeredi@xxxxxxxxxx> wrote: > > Add way to query the children of a particular mount. This is a more > flexible way to iterate the mount tree than having to parse the complete > /proc/self/mountinfo. > > Lookup the mount by the old (32bit) or new (64bit) mount ID. If a mount > needs to be queried based on path, then statx(2) can be used to first query > the mount ID belonging to the path. > > Return an array of new (64bit) mount ID's. Without privileges only mounts > are listed which are reachable from the task's root. > > Signed-off-by: Miklos Szeredi <mszeredi@xxxxxxxxxx> > --- > arch/x86/entry/syscalls/syscall_64.tbl | 1 + > fs/namespace.c | 51 ++++++++++++++++++++++++++ > include/linux/syscalls.h | 2 + > include/uapi/asm-generic/unistd.h | 5 ++- > 4 files changed, 58 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl > index 6d807c30cd16..0d9a47b0ce9b 100644 > --- a/arch/x86/entry/syscalls/syscall_64.tbl > +++ b/arch/x86/entry/syscalls/syscall_64.tbl > @@ -376,6 +376,7 @@ > 452 common fchmodat2 sys_fchmodat2 > 453 64 map_shadow_stack sys_map_shadow_stack > 454 common statmnt sys_statmnt > +455 common listmnt sys_listmnt > > # > # Due to a historical design error, certain syscalls are numbered differently > diff --git a/fs/namespace.c b/fs/namespace.c > index 088a52043bba..5362b1ffb26f 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -4988,6 +4988,57 @@ SYSCALL_DEFINE5(statmnt, u64, mnt_id, > return err; > } > > +static long do_listmnt(struct vfsmount *mnt, u64 __user *buf, size_t bufsize, > + const struct path *root) > +{ > + struct mount *r, *m = real_mount(mnt); > + struct path rootmnt = { .mnt = root->mnt, .dentry = root->mnt->mnt_root }; > + long ctr = 0; > + > + if (!capable(CAP_SYS_ADMIN) && > + !is_path_reachable(m, mnt->mnt_root, &rootmnt)) > + return -EPERM; > + > + list_for_each_entry(r, &m->mnt_mounts, mnt_child) { > + if (!capable(CAP_SYS_ADMIN) && > + !is_path_reachable(r, r->mnt.mnt_root, root)) > + continue; > + > + if (ctr >= bufsize) > + return -EOVERFLOW; > + if (put_user(r->mnt_id_unique, buf + ctr)) > + return -EFAULT; > + ctr++; > + if (ctr < 0) > + return -ERANGE; I think it'd be good for userspace to be able to query required bufsize with NULL buf, listattr style, rather than having to guess and re-guess on EOVERFLOW. Thanks, Amir. > + } > + return ctr; > +} > + > +SYSCALL_DEFINE4(listmnt, u64, mnt_id, u64 __user *, buf, size_t, bufsize, > + unsigned int, flags) > +{ > + struct vfsmount *mnt; > + struct path root; > + long err; > + > + if (flags) > + return -EINVAL; > + > + down_read(&namespace_sem); > + mnt = lookup_mnt_in_ns(mnt_id, current->nsproxy->mnt_ns); > + err = -ENOENT; > + if (mnt) { > + get_fs_root(current->fs, &root); > + err = do_listmnt(mnt, buf, bufsize, &root); > + path_put(&root); > + } > + up_read(&namespace_sem); > + > + return err; > +} > + > + > static void __init init_mount_tree(void) > { > struct vfsmount *mnt; > diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h > index 1099bd307fa7..5d776cdb6f18 100644 > --- a/include/linux/syscalls.h > +++ b/include/linux/syscalls.h > @@ -411,6 +411,8 @@ asmlinkage long sys_fstatfs64(unsigned int fd, size_t sz, > asmlinkage long sys_statmnt(u64 mnt_id, u64 mask, > struct statmnt __user *buf, size_t bufsize, > unsigned int flags); > +asmlinkage long sys_listmnt(u64 mnt_id, u64 __user *buf, size_t bufsize, > + unsigned int flags); > asmlinkage long sys_truncate(const char __user *path, long length); > asmlinkage long sys_ftruncate(unsigned int fd, unsigned long length); > #if BITS_PER_LONG == 32 > diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h > index 640997231ff6..a2b41370f603 100644 > --- a/include/uapi/asm-generic/unistd.h > +++ b/include/uapi/asm-generic/unistd.h > @@ -826,8 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) > #define __NR_statmnt 454 > __SYSCALL(__NR_statmnt, sys_statmnt) > > +#define __NR_listmnt 455 > +__SYSCALL(__NR_listmnt, sys_listmnt) > + > #undef __NR_syscalls > -#define __NR_syscalls 455 > +#define __NR_syscalls 456 > > /* > * 32 bit systems traditionally used different > -- > 2.41.0 >