On Fri, Jun 14, 2024 at 04:15:47PM GMT, Darrick J. Wong wrote: > On Fri, Jan 13, 2023 at 12:49:31PM +0100, Christian Brauner wrote: > > Convert to struct mnt_idmap. > > > > Last cycle we merged the necessary infrastructure in > > 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts"). > > This is just the conversion to struct mnt_idmap. > > > > Currently we still pass around the plain namespace that was attached to a > > mount. This is in general pretty convenient but it makes it easy to > > conflate namespaces that are relevant on the filesystem with namespaces > > that are relevent on the mount level. Especially for non-vfs developers > > without detailed knowledge in this area this can be a potential source for > > bugs. > > > > Once the conversion to struct mnt_idmap is done all helpers down to the > > really low-level helpers will take a struct mnt_idmap argument instead of > > two namespace arguments. This way it becomes impossible to conflate the two > > eliminating the possibility of any bugs. All of the vfs and all filesystems > > only operate on struct mnt_idmap. > > > > Signed-off-by: Christian Brauner (Microsoft) <brauner@xxxxxxxxxx> > > --- > > fs/ext4/ialloc.c | 3 +-- > > fs/inode.c | 6 ++---- > > fs/xfs/xfs_inode.c | 13 +++++-------- > > fs/xfs/xfs_symlink.c | 5 ++--- > > include/linux/fs.h | 21 ++++++++++----------- > > include/linux/mnt_idmapping.h | 18 ++++++++++-------- > > 6 files changed, 30 insertions(+), 36 deletions(-) > > > > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c > > index 1024b0c02431..157663031f8c 100644 > > --- a/fs/ext4/ialloc.c > > +++ b/fs/ext4/ialloc.c > > @@ -943,7 +943,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, > > ext4_group_t flex_group; > > struct ext4_group_info *grp = NULL; > > bool encrypt = false; > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > > > /* Cannot create files in a deleted directory */ > > if (!dir || !dir->i_nlink) > > @@ -973,7 +972,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap, > > i_gid_write(inode, owner[1]); > > } else if (test_opt(sb, GRPID)) { > > inode->i_mode = mode; > > - inode_fsuid_set(inode, mnt_userns); > > + inode_fsuid_set(inode, idmap); > > inode->i_gid = dir->i_gid; > > } else > > inode_init_owner(idmap, inode, dir, mode); > > diff --git a/fs/inode.c b/fs/inode.c > > index 1aec92141fab..1b05e0e4b5c8 100644 > > --- a/fs/inode.c > > +++ b/fs/inode.c > > @@ -2293,9 +2293,7 @@ EXPORT_SYMBOL(init_special_inode); > > void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, > > const struct inode *dir, umode_t mode) > > { > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > - > > - inode_fsuid_set(inode, mnt_userns); > > + inode_fsuid_set(inode, idmap); > > if (dir && dir->i_mode & S_ISGID) { > > inode->i_gid = dir->i_gid; > > > > @@ -2303,7 +2301,7 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode, > > if (S_ISDIR(mode)) > > mode |= S_ISGID; > > } else > > - inode_fsgid_set(inode, mnt_userns); > > + inode_fsgid_set(inode, idmap); > > inode->i_mode = mode; > > } > > EXPORT_SYMBOL(inode_init_owner); > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c > > index 59fb064e2df3..7f1d715faab5 100644 > > --- a/fs/xfs/xfs_inode.c > > +++ b/fs/xfs/xfs_inode.c > > @@ -788,7 +788,6 @@ xfs_init_new_inode( > > bool init_xattrs, > > struct xfs_inode **ipp) > > { > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > struct inode *dir = pip ? VFS_I(pip) : NULL; > > struct xfs_mount *mp = tp->t_mountp; > > struct xfs_inode *ip; > > @@ -824,7 +823,7 @@ xfs_init_new_inode( > > ip->i_projid = prid; > > > > if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) { > > - inode_fsuid_set(inode, mnt_userns); > > + inode_fsuid_set(inode, idmap); > > inode->i_gid = dir->i_gid; > > inode->i_mode = mode; > > } else { > > @@ -955,7 +954,6 @@ xfs_create( > > bool init_xattrs, > > xfs_inode_t **ipp) > > { > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > int is_dir = S_ISDIR(mode); > > struct xfs_mount *mp = dp->i_mount; > > struct xfs_inode *ip = NULL; > > @@ -980,8 +978,8 @@ xfs_create( > > /* > > * Make sure that we have allocated dquot(s) on disk. > > */ > > - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), > > - mapped_fsgid(mnt_userns, &init_user_ns), prid, > > + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), > > + mapped_fsgid(idmap, &init_user_ns), prid, > > XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, > > &udqp, &gdqp, &pdqp); > > if (error) > > @@ -1109,7 +1107,6 @@ xfs_create_tmpfile( > > umode_t mode, > > struct xfs_inode **ipp) > > { > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > struct xfs_mount *mp = dp->i_mount; > > struct xfs_inode *ip = NULL; > > struct xfs_trans *tp = NULL; > > @@ -1130,8 +1127,8 @@ xfs_create_tmpfile( > > /* > > * Make sure that we have allocated dquot(s) on disk. > > */ > > - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), > > - mapped_fsgid(mnt_userns, &init_user_ns), prid, > > + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), > > + mapped_fsgid(idmap, &init_user_ns), prid, > > XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, > > &udqp, &gdqp, &pdqp); > > if (error) > > diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c > > index 24cf0a16bf35..85e433df6a3f 100644 > > --- a/fs/xfs/xfs_symlink.c > > +++ b/fs/xfs/xfs_symlink.c > > @@ -151,7 +151,6 @@ xfs_symlink( > > umode_t mode, > > struct xfs_inode **ipp) > > { > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > struct xfs_mount *mp = dp->i_mount; > > struct xfs_trans *tp = NULL; > > struct xfs_inode *ip = NULL; > > @@ -194,8 +193,8 @@ xfs_symlink( > > /* > > * Make sure that we have allocated dquot(s) on disk. > > */ > > - error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns), > > - mapped_fsgid(mnt_userns, &init_user_ns), prid, > > + error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns), > > + mapped_fsgid(idmap, &init_user_ns), prid, > > I know this is pretty late to be asking questions about this patch, but > after reviewing this code, I am curious about something -- That's perfectly fine! > > Here we try to allocate dquots prior to creating a file. The uid used > here is: > > mapped_fsuid(mnt_userns, &init_user_ns) > > But later in inode_fsuid_set, we set the new inode's actual uid to: > > mapped_fsuid(mnt_userns, i_user_ns(inode)); > > What happens if i_user_ns(inode) != &init_user_ns? Can that change the Excellent observation and question. > return value of mapped_fsuid with the result that quota accounting > charges the wrong uid if the user namespaces are different? > Unfortunately I'm not familiar enough with how these things work to know > if that's even a sensible question. So a few preliminary remarks: (1) i_user_ns() retries the user namespace of the superblock. For example: mount a tmpfs filesystem in the initial mount namespace owned by the initial user namespace: mount -t tmpfs tmpfs /opt In this case init_user_ns == i_user_ns(inode) But if we create a new mount namespace owned by an unprivilged user namespace: unshare --mount --user --map-root and mount tmpfs in there: mount -t tmpfs tmpfs /mnt then init_user_ns != i_user_ns(inode) (2) Filesystems must be explicitly marked as being mountable inside of a user namespace aka by an unprivileged user. That only includes pseudo filesystems (and I really want a VFS assertion that it stays that way). Since xfs isn't in (2) the scenario described in (1) cannot happen. For all non-userns mountable filesystems like xfs I generally chose to pass init_user_ns explicitly everywhere it's required. But general helpers like i_user_ns() need to work for both and that's why it always uses i_user_ns(). You could technically put a WARN_ON_ONCE() in inode_fsuid_set() that basically does: @@ -1492,6 +1492,7 @@ static inline void i_gid_update(struct mnt_idmap *idmap, static inline void inode_fsuid_set(struct inode *inode, struct mnt_idmap *idmap) { + WARN_ON_ONCE(i_user_ns(inode) != &init_user_ns && inode->i_sb->s_type->fs_flags & FS_USERNS_MOUNT); inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); } But I didn't think it was worth it and if we wanted to use it then I'd rather at an inode flag so we don't have to do this double deref thing. > > In my development tree I have a few assertions sprinked to see if we > ever detect a discrepancy in the uid that is set. fstests doesn't > trigger it, nor does my usual workflow, so I really don't know. > > --D > > > XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT, > > &udqp, &gdqp, &pdqp); > > if (error) > > diff --git a/include/linux/fs.h b/include/linux/fs.h > > index 173c5274a63a..54a95ed68322 100644 > > --- a/include/linux/fs.h > > +++ b/include/linux/fs.h > > @@ -1744,29 +1744,29 @@ static inline void i_gid_update(struct mnt_idmap *idmap, > > /** > > * inode_fsuid_set - initialize inode's i_uid field with callers fsuid > > * @inode: inode to initialize > > - * @mnt_userns: user namespace of the mount the inode was found from > > + * @idmap: idmap of the mount the inode was found from > > * > > * Initialize the i_uid field of @inode. If the inode was found/created via > > - * an idmapped mount map the caller's fsuid according to @mnt_users. > > + * an idmapped mount map the caller's fsuid according to @idmap. > > */ > > static inline void inode_fsuid_set(struct inode *inode, > > - struct user_namespace *mnt_userns) > > + struct mnt_idmap *idmap) > > { > > - inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode)); > > + inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode)); > > } > > > > /** > > * inode_fsgid_set - initialize inode's i_gid field with callers fsgid > > * @inode: inode to initialize > > - * @mnt_userns: user namespace of the mount the inode was found from > > + * @idmap: idmap of the mount the inode was found from > > * > > * Initialize the i_gid field of @inode. If the inode was found/created via > > - * an idmapped mount map the caller's fsgid according to @mnt_users. > > + * an idmapped mount map the caller's fsgid according to @idmap. > > */ > > static inline void inode_fsgid_set(struct inode *inode, > > - struct user_namespace *mnt_userns) > > + struct mnt_idmap *idmap) > > { > > - inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode)); > > + inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode)); > > } > > > > /** > > @@ -1784,14 +1784,13 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb, > > struct mnt_idmap *idmap) > > { > > struct user_namespace *fs_userns = sb->s_user_ns; > > - struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > kuid_t kuid; > > kgid_t kgid; > > > > - kuid = mapped_fsuid(mnt_userns, fs_userns); > > + kuid = mapped_fsuid(idmap, fs_userns); > > if (!uid_valid(kuid)) > > return false; > > - kgid = mapped_fsgid(mnt_userns, fs_userns); > > + kgid = mapped_fsgid(idmap, fs_userns); > > if (!gid_valid(kgid)) > > return false; > > return kuid_has_mapping(fs_userns, kuid) && > > diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h > > index 0ccca33a7a6d..d63e7c84a389 100644 > > --- a/include/linux/mnt_idmapping.h > > +++ b/include/linux/mnt_idmapping.h > > @@ -375,8 +375,8 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) > > } > > > > /** > > - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns > > - * @mnt_userns: the mount's idmapping > > + * mapped_fsuid - return caller's fsuid mapped according to an idmapping > > + * @idmap: the mount's idmapping > > * @fs_userns: the filesystem's idmapping > > * > > * Use this helper to initialize a new vfs or filesystem object based on > > @@ -385,18 +385,19 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid) > > * O_CREAT. Other examples include the allocation of quotas for a specific > > * user. > > * > > - * Return: the caller's current fsuid mapped up according to @mnt_userns. > > + * Return: the caller's current fsuid mapped up according to @idmap. > > */ > > -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, > > +static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap, > > struct user_namespace *fs_userns) > > { > > + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > return from_vfsuid(mnt_userns, fs_userns, > > VFSUIDT_INIT(current_fsuid())); > > } > > > > /** > > - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns > > - * @mnt_userns: the mount's idmapping > > + * mapped_fsgid - return caller's fsgid mapped according to an idmapping > > + * @idmap: the mount's idmapping > > * @fs_userns: the filesystem's idmapping > > * > > * Use this helper to initialize a new vfs or filesystem object based on > > @@ -405,11 +406,12 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns, > > * O_CREAT. Other examples include the allocation of quotas for a specific > > * user. > > * > > - * Return: the caller's current fsgid mapped up according to @mnt_userns. > > + * Return: the caller's current fsgid mapped up according to @idmap. > > */ > > -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns, > > +static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap, > > struct user_namespace *fs_userns) > > { > > + struct user_namespace *mnt_userns = mnt_idmap_owner(idmap); > > return from_vfsgid(mnt_userns, fs_userns, > > VFSGIDT_INIT(current_fsgid())); > > } > > > > -- > > 2.34.1 > >