Currently, vfs only passes mode argument to filesystem, then use inode_init_owner() to strip S_ISGID. Some filesystem(ie ext4/btrfs) will call inode_init_owner firstly, then posxi acl setup, but xfs uses the contrary order. It will affect S_ISGID clear especially umask with S_IXGRP. Vfs has all the info it needs - it doesn't need the filesystems to do everything correctly with the mode and ensuring that they order things like posix acl setup functions correctly with inode_init_owner() to strip the SGID bit. Just strip the SGID bit at the VFS, and then the filesystems can't get it wrong. Also, the inode_sgid_strip() api should be used before IS_POSIXACL() because this api may change mode by using umask but S_ISGID clear isn't related to SB_POSIXACL flag. Suggested-by: Dave Chinner <david@xxxxxxxxxxxxx> Signed-off-by: Yang Xu <xuyang2018.jy@xxxxxxxxxxx> --- fs/inode.c | 4 ---- fs/namei.c | 7 +++++-- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/inode.c b/fs/inode.c index 1f964e7f9698..a2dd71c2437e 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -2246,10 +2246,6 @@ void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode, /* Directories are special, and always inherit S_ISGID */ if (S_ISDIR(mode)) mode |= S_ISGID; - else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) && - !in_group_p(i_gid_into_mnt(mnt_userns, dir)) && - !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID)) - mode &= ~S_ISGID; } else inode_fsgid_set(inode, mnt_userns); inode->i_mode = mode; diff --git a/fs/namei.c b/fs/namei.c index 3f1829b3ab5b..e68a99e0ac96 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -3287,6 +3287,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, if (open_flag & O_CREAT) { if (open_flag & O_EXCL) open_flag &= ~O_TRUNC; + inode_sgid_strip(mnt_userns, dir->d_inode, &mode); if (!IS_POSIXACL(dir->d_inode)) mode &= ~current_umask(); if (likely(got_write)) @@ -3521,6 +3522,8 @@ struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns, child = d_alloc(dentry, &slash_name); if (unlikely(!child)) goto out_err; + inode_sgid_strip(mnt_userns, dir, &mode); + error = dir->i_op->tmpfile(mnt_userns, dir, child, mode); if (error) goto out_err; @@ -3849,14 +3852,14 @@ static int do_mknodat(int dfd, struct filename *name, umode_t mode, error = PTR_ERR(dentry); if (IS_ERR(dentry)) goto out1; - + mnt_userns = mnt_user_ns(path.mnt); + inode_sgid_strip(mnt_userns, path.dentry->d_inode, &mode); if (!IS_POSIXACL(path.dentry->d_inode)) mode &= ~current_umask(); error = security_path_mknod(&path, dentry, mode, dev); if (error) goto out2; - mnt_userns = mnt_user_ns(path.mnt); switch (mode & S_IFMT) { case 0: case S_IFREG: error = vfs_create(mnt_userns, path.dentry->d_inode, -- 2.27.0