on 2022/4/7 21:40, Christian Brauner wrote: > On Thu, Apr 07, 2022 at 08:09:31PM +0800, Yang Xu wrote: >> Since mknodat can create file, we should also check whether strip S_ISGID. >> Also add new helper caps_down_fsetid to drop CAP_FSETID because strip S_ISGID >> depond on this cap and keep other cap(ie CAP_MKNOD) because create character >> device needs it when using mknod. >> >> Only test mknodat with character device in setgid_create function because the another >> two functions test mknodat with whiteout device. >> >> Since kernel commit a3c751a50 ("vfs: allow unprivileged whiteout creation") in >> v5.8-rc1, we can create whiteout device in userns test. Since kernel 5.12, mount_setattr >> and MOUNT_ATTR_IDMAP was supported, we don't need to detect kernel whether allow >> unprivileged whiteout creation. Using fs_allow_idmap as a proxy is safe. >> >> Tested-by: Christian Brauner (Microsoft)<brauner@xxxxxxxxxx> >> Reviewed-by: Christian Brauner (Microsoft)<brauner@xxxxxxxxxx> >> Signed-off-by: Yang Xu<xuyang2018.jy@xxxxxxxxxxx> >> --- >> src/idmapped-mounts/idmapped-mounts.c | 190 +++++++++++++++++++++++++- >> 1 file changed, 183 insertions(+), 7 deletions(-) >> >> diff --git a/src/idmapped-mounts/idmapped-mounts.c b/src/idmapped-mounts/idmapped-mounts.c >> index dff6820f..e8a856de 100644 >> --- a/src/idmapped-mounts/idmapped-mounts.c >> +++ b/src/idmapped-mounts/idmapped-mounts.c >> @@ -241,6 +241,34 @@ static inline bool caps_supported(void) >> return ret; >> } >> >> +static int caps_down_fsetid(void) >> +{ >> + bool fret = false; >> +#ifdef HAVE_SYS_CAPABILITY_H >> + cap_t caps = NULL; >> + cap_value_t cap = CAP_FSETID; >> + int ret = -1; >> + >> + caps = cap_get_proc(); >> + if (!caps) >> + goto out; >> + >> + ret = cap_set_flag(caps, CAP_EFFECTIVE, 1,&cap, 0); >> + if (ret) >> + goto out; >> + >> + ret = cap_set_proc(caps); >> + if (ret) >> + goto out; >> + >> + fret = true; >> + >> +out: >> + cap_free(caps); >> +#endif >> + return fret; >> +} >> + >> /* caps_down - lower all effective caps */ >> static int caps_down(void) >> { >> @@ -7805,8 +7833,8 @@ out_unmap: >> #endif /* HAVE_LIBURING_H */ >> >> /* The following tests are concerned with setgid inheritance. These can be >> - * filesystem type specific. For xfs, if a new file or directory is created >> - * within a setgid directory and irix_sgid_inhiert is set then inherit the >> + * filesystem type specific. For xfs, if a new file or directory or node is >> + * created within a setgid directory and irix_sgid_inhiert is set then inherit the >> * setgid bit if the caller is in the group of the directory. >> */ >> static int setgid_create(void) >> @@ -7863,15 +7891,41 @@ static int setgid_create(void) >> if (!is_setgid(t_dir1_fd, DIR1, 0)) >> die("failure: is_setgid"); >> >> + /* create a special file via mknodat() vfs_create */ >> + if (mknodat(t_dir1_fd, DIR1 "/" FILE1, S_IFREG | S_ISGID | 0755, 0)) >> + die("failure: mknodat"); > > Can you please replace 0755 with the corresponding flags everywhere? > I personally find them easier to parse but it's also what we've been > using mostly everywhere in the testsuite. OK, will do. > >> + >> + if (!is_setgid(t_dir1_fd, DIR1 "/" FILE1, 0)) >> + die("failure: is_setgid"); >> + >> + /* create a character device via mknodat() vfs_mknod */ >> + if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | S_ISGID | 0755, makedev(5, 1))) >> + die("failure: mknodat"); >> + >> + if (!is_setgid(t_dir1_fd, CHRDEV1, 0)) >> + die("failure: is_setgid"); >> + >> if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) >> die("failure: check ownership"); >> >> + if (!expected_uid_gid(t_dir1_fd, DIR1 "/" FILE1, 0, 0, 0)) >> + die("failure: check ownership"); >> + >> + if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) >> + die("failure: check ownership"); >> + >> if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) >> die("failure: check ownership"); >> >> if (unlinkat(t_dir1_fd, FILE1, 0)) >> die("failure: delete"); >> >> + if (unlinkat(t_dir1_fd, DIR1 "/" FILE1, 0)) >> + die("failure: delete"); >> + >> + if (unlinkat(t_dir1_fd, CHRDEV1, 0)) >> + die("failure: delete"); >> + >> if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR)) >> die("failure: delete"); >> >> @@ -7889,8 +7943,8 @@ static int setgid_create(void) >> if (!switch_ids(0, 10000)) >> die("failure: switch_ids"); >> >> - if (!caps_down()) >> - die("failure: caps_down"); >> + if (!caps_down_fsetid()) >> + die("failure: caps_down_fsetid"); >> >> /* create regular file via open() */ >> file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID); >> @@ -7917,6 +7971,19 @@ static int setgid_create(void) >> die("failure: is_setgid"); >> } >> >> + /* create a special file via mknodat() vfs_create */ >> + if (mknodat(t_dir1_fd, DIR1 "/" FILE1, S_IFREG | S_ISGID | 0755, 0)) >> + die("failure: mknodat"); >> + >> + if (is_setgid(t_dir1_fd, DIR1 "/" FILE1, 0)) >> + die("failure: is_setgid"); >> + >> + /* create a character device via mknodat() vfs_mknod */ >> + if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | S_ISGID | 0755, makedev(5, 1))) >> + die("failure: mknodat"); >> + >> + if (is_setgid(t_dir1_fd, CHRDEV1, 0)) >> + die("failure: is_setgid"); > > Right above this section you can see the following: > > if (xfs_irix_sgid_inherit_enabled()) { > /* We're not in_group_p(). */ > if (is_setgid(t_dir1_fd, DIR1, 0)) > die("failure: is_setgid"); > } else { > > which tests xfs specific behavior. If this is turned on then > t_dir1_fd/DIR1 won't bet a setgid directory. > > Consequently the test: > > /* create a special file via mknodat() vfs_create */ > if (mknodat(t_dir1_fd, DIR1 "/" FILE1, S_IFREG | S_ISGID | 0755, 0)) > die("failure: mknodat"); > > if (is_setgid(t_dir1_fd, DIR1 "/" FILE1, 0)) > die("failure: is_setgid"); > > will fail because the branch in the kernel that strips the setgid bit > won't be hit. > > So afiact, you need to ensure that the setgid bit is raised in the if > (xfs_irix_sgid_inherit_enabled()) branch after having verified that the > directory hasn't inherited the setgid bit with the legacy irix behavior. > > If you don't do that then the test will be a false negative for xfs with > the sysctl turned on. It's super rare but it'll be annoying if we have > to track this down later. Good catch, will fix this. Best Regards Yang Xu