Since mknodat can create file, we should also check whether strip S_ISGID. Also add new helper caps_down_fsetid to drop CAP_FSETID because strip S_ISGID depend on this cap and keep other cap(ie CAP_MKNOD) because create character device needs it when using mknod. Only test mknodat with character device in setgid_create function and the another two functions test mknodat with whiteout device. Since kernel commit a3c751a50 ("vfs: allow unprivileged whiteout creation") in v5.8-rc1, we can create whiteout device in userns test. Since kernel 5.12, mount_setattr and MOUNT_ATTR_IDMAP was supported, we don't need to detect kernel whether allow unprivileged whiteout creation. Using fs_allow_idmap as a proxy is safe. Tested-by: Christian Brauner (Microsoft)<brauner@xxxxxxxxxx> Reviewed-by: Christian Brauner (Microsoft)<brauner@xxxxxxxxxx> Signed-off-by: Yang Xu <xuyang2018.jy@xxxxxxxxxxx> --- src/idmapped-mounts/idmapped-mounts.c | 222 +++++++++++++++++++++++++- 1 file changed, 215 insertions(+), 7 deletions(-) diff --git a/src/idmapped-mounts/idmapped-mounts.c b/src/idmapped-mounts/idmapped-mounts.c index 2e94bf71..592a15d5 100644 --- a/src/idmapped-mounts/idmapped-mounts.c +++ b/src/idmapped-mounts/idmapped-mounts.c @@ -243,6 +243,35 @@ static inline bool caps_supported(void) return ret; } +/* caps_down_fsetid - lower CAP_FSETID effective cap */ +static int caps_down_fsetid(void) +{ + bool fret = false; +#ifdef HAVE_SYS_CAPABILITY_H + cap_t caps = NULL; + cap_value_t cap = CAP_FSETID; + int ret = -1; + + caps = cap_get_proc(); + if (!caps) + goto out; + + ret = cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap, 0); + if (ret) + goto out; + + ret = cap_set_proc(caps); + if (ret) + goto out; + + fret = true; + +out: + cap_free(caps); +#endif + return fret; +} + /* caps_down - lower all effective caps */ static int caps_down(void) { @@ -7807,9 +7836,9 @@ out_unmap: #endif /* HAVE_LIBURING_H */ /* The following tests are concerned with setgid inheritance. These can be - * filesystem type specific. For xfs, if a new file or directory is created - * within a setgid directory and irix_sgid_inhiert is set then inherit the - * setgid bit if the caller is in the group of the directory. + * filesystem type specific. For xfs, if a new file or directory or node is + * created within a setgid directory and irix_sgid_inhiert is set then inherit + * the setgid bit if the caller is in the group of the directory. */ static int setgid_create(void) { @@ -7865,18 +7894,44 @@ static int setgid_create(void) if (!is_setgid(t_dir1_fd, DIR1, 0)) die("failure: is_setgid"); + /* create a special file via mknodat() vfs_create */ + if (mknodat(t_dir1_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (!is_setgid(t_dir1_fd, FILE2, 0)) + die("failure: is_setgid"); + + /* create a character device via mknodat() vfs_mknod */ + if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, makedev(5, 1))) + die("failure: mknodat"); + + if (!is_setgid(t_dir1_fd, CHRDEV1, 0)) + die("failure: is_setgid"); + if (!expected_uid_gid(t_dir1_fd, FILE1, 0, 0, 0)) die("failure: check ownership"); if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) die("failure: check ownership"); + if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) + die("failure: check ownership"); + + if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) + die("failure: check ownership"); + if (unlinkat(t_dir1_fd, FILE1, 0)) die("failure: delete"); if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR)) die("failure: delete"); + if (unlinkat(t_dir1_fd, FILE2, 0)) + die("failure: delete"); + + if (unlinkat(t_dir1_fd, CHRDEV1, 0)) + die("failure: delete"); + exit(EXIT_SUCCESS); } if (wait_for_pid(pid)) @@ -7891,8 +7946,8 @@ static int setgid_create(void) if (!switch_ids(0, 10000)) die("failure: switch_ids"); - if (!caps_down()) - die("failure: caps_down"); + if (!caps_down_fsetid()) + die("failure: caps_down_fsetid"); /* create regular file via open() */ file1_fd = openat(t_dir1_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID); @@ -7919,6 +7974,19 @@ static int setgid_create(void) die("failure: is_setgid"); } + /* create a special file via mknodat() vfs_create */ + if (mknodat(t_dir1_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(t_dir1_fd, FILE2, 0)) + die("failure: is_setgid"); + + /* create a character device via mknodat() vfs_mknod */ + if (mknodat(t_dir1_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, makedev(5, 1))) + die("failure: mknodat"); + + if (is_setgid(t_dir1_fd, CHRDEV1, 0)) + die("failure: is_setgid"); /* * In setgid directories newly created files always inherit the * gid from the parent directory. Verify that the file is owned @@ -7935,6 +8003,24 @@ static int setgid_create(void) if (!expected_uid_gid(t_dir1_fd, DIR1, 0, 0, 0)) die("failure: check ownership"); + if (!expected_uid_gid(t_dir1_fd, FILE2, 0, 0, 0)) + die("failure: check ownership"); + + if (!expected_uid_gid(t_dir1_fd, CHRDEV1, 0, 0, 0)) + die("failure: check ownership"); + + if (unlinkat(t_dir1_fd, FILE1, 0)) + die("failure: delete"); + + if (unlinkat(t_dir1_fd, DIR1, AT_REMOVEDIR)) + die("failure: delete"); + + if (unlinkat(t_dir1_fd, FILE2, 0)) + die("failure: delete"); + + if (unlinkat(t_dir1_fd, CHRDEV1, 0)) + die("failure: delete"); + exit(EXIT_SUCCESS); } if (wait_for_pid(pid)) @@ -8037,6 +8123,20 @@ static int setgid_create_idmapped(void) die("failure: is_setgid"); } + /* create a special file via mknodat() vfs_create */ + if (mknodat(open_tree_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(open_tree_fd, FILE2, 0)) + die("failure: is_setgid"); + + /* create a whiteout device via mknodat() vfs_mknod */ + if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(open_tree_fd, CHRDEV1, 0)) + die("failure: is_setgid"); + /* * In setgid directories newly created files always inherit the * gid from the parent directory. Verify that the file is owned @@ -8053,6 +8153,24 @@ static int setgid_create_idmapped(void) if (!expected_uid_gid(open_tree_fd, DIR1, 0, 10000, 10000)) die("failure: check ownership"); + if (!expected_uid_gid(open_tree_fd, FILE2, 0, 10000, 10000)) + die("failure: check ownership"); + + if (!expected_uid_gid(open_tree_fd, CHRDEV1, 0, 10000, 10000)) + die("failure: check ownership"); + + if (unlinkat(open_tree_fd, FILE1, 0)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, FILE2, 0)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, CHRDEV1, 0)) + die("failure: delete"); + exit(EXIT_SUCCESS); } if (wait_for_pid(pid)) @@ -8151,18 +8269,44 @@ static int setgid_create_idmapped_in_userns(void) if (!is_setgid(open_tree_fd, DIR1, 0)) die("failure: is_setgid"); + /* create a special file via mknodat() vfs_create */ + if (mknodat(open_tree_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (!is_setgid(open_tree_fd, FILE2, 0)) + die("failure: is_setgid"); + + /* create a whiteout device via mknodat() vfs_mknod */ + if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (!is_setgid(open_tree_fd, CHRDEV1, 0)) + die("failure: is_setgid"); + if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) die("failure: check ownership"); if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0)) die("failure: check ownership"); + if (!expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0)) + die("failure: check ownership"); + + if (!expected_uid_gid(open_tree_fd, CHRDEV1, 0, 0, 0)) + die("failure: check ownership"); + if (unlinkat(open_tree_fd, FILE1, 0)) die("failure: delete"); if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) die("failure: delete"); + if (unlinkat(open_tree_fd, FILE2, 0)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, CHRDEV1, 0)) + die("failure: delete"); + exit(EXIT_SUCCESS); } if (wait_for_pid(pid)) @@ -8192,9 +8336,12 @@ static int setgid_create_idmapped_in_userns(void) exit(EXIT_SUCCESS); } - if (!switch_userns(attr.userns_fd, 0, 0, true)) + if (!switch_userns(attr.userns_fd, 0, 0, false)) die("failure: switch_userns"); + if (!caps_down_fsetid()) + die("failure: caps_down_fsetid"); + /* create regular file via open() */ file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID); if (file1_fd < 0) @@ -8220,6 +8367,20 @@ static int setgid_create_idmapped_in_userns(void) die("failure: is_setgid"); } + /* create a special file via mknodat() vfs_create */ + if (mknodat(open_tree_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(open_tree_fd, FILE2, 0)) + die("failure: is_setgid"); + + /* create a whiteout device via mknodat() vfs_mknod */ + if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(open_tree_fd, CHRDEV1, 0)) + die("failure: is_setgid"); + /* * In setgid directories newly created files always inherit the * gid from the parent directory. Verify that the file is owned @@ -8236,12 +8397,24 @@ static int setgid_create_idmapped_in_userns(void) if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 1000)) die("failure: check ownership"); + if (!expected_uid_gid(open_tree_fd, FILE2, 0, 0, 1000)) + die("failure: check ownership"); + + if (!expected_uid_gid(open_tree_fd, CHRDEV1, 0, 0, 1000)) + die("failure: check ownership"); + if (unlinkat(open_tree_fd, FILE1, 0)) die("failure: delete"); if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) die("failure: delete"); + if (unlinkat(open_tree_fd, FILE2, 0)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, CHRDEV1, 0)) + die("failure: delete"); + exit(EXIT_SUCCESS); } if (wait_for_pid(pid)) @@ -8268,9 +8441,12 @@ static int setgid_create_idmapped_in_userns(void) exit(EXIT_SUCCESS); } - if (!switch_userns(attr.userns_fd, 0, 1000, true)) + if (!switch_userns(attr.userns_fd, 0, 1000, false)) die("failure: switch_userns"); + if (!caps_down_fsetid()) + die("failure: caps_down_fsetid"); + /* create regular file via open() */ file1_fd = openat(open_tree_fd, FILE1, O_CREAT | O_EXCL | O_CLOEXEC, S_IXGRP | S_ISGID); if (file1_fd < 0) @@ -8297,12 +8473,44 @@ static int setgid_create_idmapped_in_userns(void) die("failure: is_setgid"); } + /* create a special file via mknodat() vfs_create */ + if (mknodat(open_tree_fd, FILE2, S_IFREG | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(open_tree_fd, FILE2, 0)) + die("failure: is_setgid"); + + /* create a whiteout device via mknodat() vfs_mknod */ + if (mknodat(open_tree_fd, CHRDEV1, S_IFCHR | S_ISGID | S_IXGRP, 0)) + die("failure: mknodat"); + + if (is_setgid(open_tree_fd, CHRDEV1, 0)) + die("failure: is_setgid"); + if (!expected_uid_gid(open_tree_fd, FILE1, 0, 0, 0)) die("failure: check ownership"); if (!expected_uid_gid(open_tree_fd, DIR1, 0, 0, 0)) die("failure: check ownership"); + if (!expected_uid_gid(open_tree_fd, FILE2, 0, 0, 0)) + die("failure: check ownership"); + + if (!expected_uid_gid(open_tree_fd, CHRDEV1, 0, 0, 0)) + die("failure: check ownership"); + + if (unlinkat(open_tree_fd, FILE1, 0)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, DIR1, AT_REMOVEDIR)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, FILE2, 0)) + die("failure: delete"); + + if (unlinkat(open_tree_fd, CHRDEV1, 0)) + die("failure: delete"); + exit(EXIT_SUCCESS); } if (wait_for_pid(pid)) -- 2.27.0