On Tue, Apr 12, 2022 at 07:33:43PM +0800, Yang Xu wrote: > Since mknodat can create file, we should also check whether strip S_ISGID. > Also add new helper caps_down_fsetid to drop CAP_FSETID because strip S_ISGID > depend on this cap and keep other cap(ie CAP_MKNOD) because create character > device needs it when using mknod. > > Only test mknodat with character device in setgid_create function and the another > two functions test mknodat with whiteout device. > > Since kernel commit a3c751a50 ("vfs: allow unprivileged whiteout creation") in > v5.8-rc1, we can create whiteout device in userns test. Since kernel 5.12, mount_setattr > and MOUNT_ATTR_IDMAP was supported, we don't need to detect kernel whether allow > unprivileged whiteout creation. Using fs_allow_idmap as a proxy is safe. > > Tested-by: Christian Brauner (Microsoft)<brauner@xxxxxxxxxx> > Reviewed-by: Christian Brauner (Microsoft)<brauner@xxxxxxxxxx> > Signed-off-by: Yang Xu <xuyang2018.jy@xxxxxxxxxxx> > --- > src/idmapped-mounts/idmapped-mounts.c | 219 +++++++++++++++++++++++++- > 1 file changed, 213 insertions(+), 6 deletions(-) > > diff --git a/src/idmapped-mounts/idmapped-mounts.c b/src/idmapped-mounts/idmapped-mounts.c > index 8e6405c5..617f56e0 100644 > --- a/src/idmapped-mounts/idmapped-mounts.c > +++ b/src/idmapped-mounts/idmapped-mounts.c > @@ -241,6 +241,34 @@ static inline bool caps_supported(void) > return ret; > } > > +static int caps_down_fsetid(void) > +{ > + bool fret = false; > +#ifdef HAVE_SYS_CAPABILITY_H > + cap_t caps = NULL; > + cap_value_t cap = CAP_FSETID; > + int ret = -1; > + > + caps = cap_get_proc(); > + if (!caps) > + goto out; > + > + ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0); > + if (ret) > + goto out; > + > + ret = cap_set_proc(caps); > + if (ret) > + goto out; > + > + fret = true; > + > +out: > + cap_free(caps); > +#endif > + return fret; > +} > + > /* caps_down - lower all effective caps */ > static int caps_down(void) > { > @@ -7805,8 +7833,8 @@ out_unmap: > #endif /* HAVE_LIBURING_H */ > > /* The following tests are concerned with setgid inheritance. These can be > - * filesystem type specific. For xfs, if a new file or directory is created > - * within a setgid directory and irix_sgid_inhiert is set then inherit the > + * filesystem type specific. For xfs, if a new file or directory or node is > + * created within a setgid directory and irix_sgid_inhiert is set then inherit the > * setgid bit if the caller is in the group of the directory. > */ > static int setgid_create(void) > @@ -7863,18 +7891,44 @@ static int setgid_create(void) > if (!is_setgid(t_dir1_fd, DIR1, 0)) > die("failure: is_setgid"); > > + /* create a special file via mknodat() vfs_create */ > + if (mknodat(t_dir1_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) > + die("failure: mknodat"); > + > + if (!is_setgid(t_dir1_fd, FILE2, 0)) > + die("failure: is_setgid"); > + > + /* create a character device via mknodat() vfs_mknod */ > + if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, makedev(5, 1))) > + die("failure: mknodat"); > + > + if (!is_setgid(t_dir1_fd, CHRDEV1, 0)) > + die("failure: is_setgid"); > + > if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) > die("failure: check ownership"); > > if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) > die("failure: check ownership"); > > + if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) > + die("failure: check ownership"); > + > + if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) > + die("failure: check ownership"); > + > if (unlinkat(t_dir1_fd, FILE1, 0)) > die("failure: delete"); > > if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR)) > die("failure: delete"); > > + if (unlinkat(t_dir1_fd, FILE2, 0)) > + die("failure: delete"); > + > + if (unlinkat(t_dir1_fd, CHRDEV1, 0)) > + die("failure: delete"); > + > exit(EXIT_SUCCESS); > } > if (wait_for_pid(pid)) > @@ -7889,8 +7943,8 @@ static int setgid_create(void) > if (!switch_ids(0, 10000)) > die("failure: switch_ids"); > > - if (!caps_down()) > - die("failure: caps_down"); > + if (!caps_down_fsetid()) > + die("failure: caps_down_fsetid"); > > /* create regular file via open() */ > file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID); > @@ -7917,6 +7971,19 @@ static int setgid_create(void) > die("failure: is_setgid"); > } Please see my comment on the earlier thread: https://lore.kernel.org/linux-fsdevel/20220407134009.s4shhomfxjz5cf5r@wittgenstein The same issue still exists afaict, i.e. you're not handling the irix case.