Re: [PATCH 23/25] fs: port fs{g,u}id helpers to mnt_idmap

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Jun 17, 2024 at 09:41:21AM +0200, Christian Brauner wrote:
> On Fri, Jun 14, 2024 at 04:15:47PM GMT, Darrick J. Wong wrote:
> > On Fri, Jan 13, 2023 at 12:49:31PM +0100, Christian Brauner wrote:
> > > Convert to struct mnt_idmap.
> > > 
> > > Last cycle we merged the necessary infrastructure in
> > > 256c8aed2b42 ("fs: introduce dedicated idmap type for mounts").
> > > This is just the conversion to struct mnt_idmap.
> > > 
> > > Currently we still pass around the plain namespace that was attached to a
> > > mount. This is in general pretty convenient but it makes it easy to
> > > conflate namespaces that are relevant on the filesystem with namespaces
> > > that are relevent on the mount level. Especially for non-vfs developers
> > > without detailed knowledge in this area this can be a potential source for
> > > bugs.
> > > 
> > > Once the conversion to struct mnt_idmap is done all helpers down to the
> > > really low-level helpers will take a struct mnt_idmap argument instead of
> > > two namespace arguments. This way it becomes impossible to conflate the two
> > > eliminating the possibility of any bugs. All of the vfs and all filesystems
> > > only operate on struct mnt_idmap.
> > > 
> > > Signed-off-by: Christian Brauner (Microsoft) <brauner@xxxxxxxxxx>
> > > ---
> > >  fs/ext4/ialloc.c              |  3 +--
> > >  fs/inode.c                    |  6 ++----
> > >  fs/xfs/xfs_inode.c            | 13 +++++--------
> > >  fs/xfs/xfs_symlink.c          |  5 ++---
> > >  include/linux/fs.h            | 21 ++++++++++-----------
> > >  include/linux/mnt_idmapping.h | 18 ++++++++++--------
> > >  6 files changed, 30 insertions(+), 36 deletions(-)
> > > 
> > > diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
> > > index 1024b0c02431..157663031f8c 100644
> > > --- a/fs/ext4/ialloc.c
> > > +++ b/fs/ext4/ialloc.c
> > > @@ -943,7 +943,6 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
> > >  	ext4_group_t flex_group;
> > >  	struct ext4_group_info *grp = NULL;
> > >  	bool encrypt = false;
> > > -	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  
> > >  	/* Cannot create files in a deleted directory */
> > >  	if (!dir || !dir->i_nlink)
> > > @@ -973,7 +972,7 @@ struct inode *__ext4_new_inode(struct mnt_idmap *idmap,
> > >  		i_gid_write(inode, owner[1]);
> > >  	} else if (test_opt(sb, GRPID)) {
> > >  		inode->i_mode = mode;
> > > -		inode_fsuid_set(inode, mnt_userns);
> > > +		inode_fsuid_set(inode, idmap);
> > >  		inode->i_gid = dir->i_gid;
> > >  	} else
> > >  		inode_init_owner(idmap, inode, dir, mode);
> > > diff --git a/fs/inode.c b/fs/inode.c
> > > index 1aec92141fab..1b05e0e4b5c8 100644
> > > --- a/fs/inode.c
> > > +++ b/fs/inode.c
> > > @@ -2293,9 +2293,7 @@ EXPORT_SYMBOL(init_special_inode);
> > >  void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
> > >  		      const struct inode *dir, umode_t mode)
> > >  {
> > > -	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > > -
> > > -	inode_fsuid_set(inode, mnt_userns);
> > > +	inode_fsuid_set(inode, idmap);
> > >  	if (dir && dir->i_mode & S_ISGID) {
> > >  		inode->i_gid = dir->i_gid;
> > >  
> > > @@ -2303,7 +2301,7 @@ void inode_init_owner(struct mnt_idmap *idmap, struct inode *inode,
> > >  		if (S_ISDIR(mode))
> > >  			mode |= S_ISGID;
> > >  	} else
> > > -		inode_fsgid_set(inode, mnt_userns);
> > > +		inode_fsgid_set(inode, idmap);
> > >  	inode->i_mode = mode;
> > >  }
> > >  EXPORT_SYMBOL(inode_init_owner);
> > > diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
> > > index 59fb064e2df3..7f1d715faab5 100644
> > > --- a/fs/xfs/xfs_inode.c
> > > +++ b/fs/xfs/xfs_inode.c
> > > @@ -788,7 +788,6 @@ xfs_init_new_inode(
> > >  	bool			init_xattrs,
> > >  	struct xfs_inode	**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	struct inode		*dir = pip ? VFS_I(pip) : NULL;
> > >  	struct xfs_mount	*mp = tp->t_mountp;
> > >  	struct xfs_inode	*ip;
> > > @@ -824,7 +823,7 @@ xfs_init_new_inode(
> > >  	ip->i_projid = prid;
> > >  
> > >  	if (dir && !(dir->i_mode & S_ISGID) && xfs_has_grpid(mp)) {
> > > -		inode_fsuid_set(inode, mnt_userns);
> > > +		inode_fsuid_set(inode, idmap);
> > >  		inode->i_gid = dir->i_gid;
> > >  		inode->i_mode = mode;
> > >  	} else {
> > > @@ -955,7 +954,6 @@ xfs_create(
> > >  	bool			init_xattrs,
> > >  	xfs_inode_t		**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	int			is_dir = S_ISDIR(mode);
> > >  	struct xfs_mount	*mp = dp->i_mount;
> > >  	struct xfs_inode	*ip = NULL;
> > > @@ -980,8 +978,8 @@ xfs_create(
> > >  	/*
> > >  	 * Make sure that we have allocated dquot(s) on disk.
> > >  	 */
> > > -	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
> > > -			mapped_fsgid(mnt_userns, &init_user_ns), prid,
> > > +	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
> > > +			mapped_fsgid(idmap, &init_user_ns), prid,
> > >  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
> > >  			&udqp, &gdqp, &pdqp);
> > >  	if (error)
> > > @@ -1109,7 +1107,6 @@ xfs_create_tmpfile(
> > >  	umode_t			mode,
> > >  	struct xfs_inode	**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	struct xfs_mount	*mp = dp->i_mount;
> > >  	struct xfs_inode	*ip = NULL;
> > >  	struct xfs_trans	*tp = NULL;
> > > @@ -1130,8 +1127,8 @@ xfs_create_tmpfile(
> > >  	/*
> > >  	 * Make sure that we have allocated dquot(s) on disk.
> > >  	 */
> > > -	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
> > > -			mapped_fsgid(mnt_userns, &init_user_ns), prid,
> > > +	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
> > > +			mapped_fsgid(idmap, &init_user_ns), prid,
> > >  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
> > >  			&udqp, &gdqp, &pdqp);
> > >  	if (error)
> > > diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
> > > index 24cf0a16bf35..85e433df6a3f 100644
> > > --- a/fs/xfs/xfs_symlink.c
> > > +++ b/fs/xfs/xfs_symlink.c
> > > @@ -151,7 +151,6 @@ xfs_symlink(
> > >  	umode_t			mode,
> > >  	struct xfs_inode	**ipp)
> > >  {
> > > -	struct user_namespace	*mnt_userns = mnt_idmap_owner(idmap);
> > >  	struct xfs_mount	*mp = dp->i_mount;
> > >  	struct xfs_trans	*tp = NULL;
> > >  	struct xfs_inode	*ip = NULL;
> > > @@ -194,8 +193,8 @@ xfs_symlink(
> > >  	/*
> > >  	 * Make sure that we have allocated dquot(s) on disk.
> > >  	 */
> > > -	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(mnt_userns, &init_user_ns),
> > > -			mapped_fsgid(mnt_userns, &init_user_ns), prid,
> > > +	error = xfs_qm_vop_dqalloc(dp, mapped_fsuid(idmap, &init_user_ns),
> > > +			mapped_fsgid(idmap, &init_user_ns), prid,
> > 
> > I know this is pretty late to be asking questions about this patch, but
> > after reviewing this code, I am curious about something --
> 
> That's perfectly fine!
> 
> > 
> > Here we try to allocate dquots prior to creating a file.  The uid used
> > here is:
> > 
> > 	mapped_fsuid(mnt_userns, &init_user_ns)
> > 
> > But later in inode_fsuid_set, we set the new inode's actual uid to:
> > 
> > 	mapped_fsuid(mnt_userns, i_user_ns(inode));
> > 
> > What happens if i_user_ns(inode) != &init_user_ns?  Can that change the
> 
> Excellent observation and question.
> 
> > return value of mapped_fsuid with the result that quota accounting
> > charges the wrong uid if the user namespaces are different?
> > Unfortunately I'm not familiar enough with how these things work to know
> > if that's even a sensible question.
> 
> So a few preliminary remarks:
> 
> (1) i_user_ns() retries the user namespace of the superblock. For
>     example:
> 
>     mount a tmpfs filesystem in the initial mount namespace owned by
>     the initial user namespace:
> 
>             mount -t tmpfs tmpfs /opt
> 
>     In this case
> 
>             init_user_ns == i_user_ns(inode)
> 
> 
>     But if we create a new mount namespace owned by an unprivilged user
>     namespace:
> 
>             unshare --mount --user --map-root
> 
>     and mount tmpfs in there:
> 
>             mount -t tmpfs tmpfs /mnt
> 
>     then
> 
>             init_user_ns != i_user_ns(inode)
> 
> (2) Filesystems must be explicitly marked as being mountable inside of a
>     user namespace aka by an unprivileged user. That only includes
>     pseudo filesystems (and I really want a VFS assertion that it
>     stays that way).
> 
> Since xfs isn't in (2) the scenario described in (1) cannot happen.

Ah, ok.  Thanks for the explanation.  I probably should update xfs to
keep these things consistent, but afaict it doesn't make any difference
with the current codebase.

> For all non-userns mountable filesystems like xfs I generally chose to
> pass init_user_ns explicitly everywhere it's required. But general
> helpers like i_user_ns() need to work for both and that's why it always
> uses i_user_ns().
> 
> You could technically put a WARN_ON_ONCE() in inode_fsuid_set() that
> basically does:
> 
> @@ -1492,6 +1492,7 @@ static inline void i_gid_update(struct mnt_idmap *idmap,
>  static inline void inode_fsuid_set(struct inode *inode,
>                                    struct mnt_idmap *idmap)
>  {
> +       WARN_ON_ONCE(i_user_ns(inode) != &init_user_ns && inode->i_sb->s_type->fs_flags & FS_USERNS_MOUNT);
>         inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
>  }
> 
> But I didn't think it was worth it and if we wanted to use it then I'd rather
> at an inode flag so we don't have to do this double deref thing.

Upon further investigation, it turns out that xfs_qm_vop_create_dqattach
already checks the inode ids vs. the dquot ids:

	if (udqp && XFS_IS_UQUOTA_ON(mp)) {
		ASSERT(ip->i_udquot == NULL);
		ASSERT(i_uid_read(VFS_I(ip)) == udqp->q_id);

		ip->i_udquot = xfs_qm_dqhold(udqp);
	}

If there's ever a discrepancy we'll catch this on CONFIG_XFS_DEBUG=y
systems, provided that the people doing QA also turn on debug mode.

Thanks for the insight!

--D

> > 
> > In my development tree I have a few assertions sprinked to see if we
> > ever detect a discrepancy in the uid that is set.  fstests doesn't
> > trigger it, nor does my usual workflow, so I really don't know.
> > 
> > --D
> > 
> > >  			XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
> > >  			&udqp, &gdqp, &pdqp);
> > >  	if (error)
> > > diff --git a/include/linux/fs.h b/include/linux/fs.h
> > > index 173c5274a63a..54a95ed68322 100644
> > > --- a/include/linux/fs.h
> > > +++ b/include/linux/fs.h
> > > @@ -1744,29 +1744,29 @@ static inline void i_gid_update(struct mnt_idmap *idmap,
> > >  /**
> > >   * inode_fsuid_set - initialize inode's i_uid field with callers fsuid
> > >   * @inode: inode to initialize
> > > - * @mnt_userns: user namespace of the mount the inode was found from
> > > + * @idmap: idmap of the mount the inode was found from
> > >   *
> > >   * Initialize the i_uid field of @inode. If the inode was found/created via
> > > - * an idmapped mount map the caller's fsuid according to @mnt_users.
> > > + * an idmapped mount map the caller's fsuid according to @idmap.
> > >   */
> > >  static inline void inode_fsuid_set(struct inode *inode,
> > > -				   struct user_namespace *mnt_userns)
> > > +				   struct mnt_idmap *idmap)
> > >  {
> > > -	inode->i_uid = mapped_fsuid(mnt_userns, i_user_ns(inode));
> > > +	inode->i_uid = mapped_fsuid(idmap, i_user_ns(inode));
> > >  }
> > >  
> > >  /**
> > >   * inode_fsgid_set - initialize inode's i_gid field with callers fsgid
> > >   * @inode: inode to initialize
> > > - * @mnt_userns: user namespace of the mount the inode was found from
> > > + * @idmap: idmap of the mount the inode was found from
> > >   *
> > >   * Initialize the i_gid field of @inode. If the inode was found/created via
> > > - * an idmapped mount map the caller's fsgid according to @mnt_users.
> > > + * an idmapped mount map the caller's fsgid according to @idmap.
> > >   */
> > >  static inline void inode_fsgid_set(struct inode *inode,
> > > -				   struct user_namespace *mnt_userns)
> > > +				   struct mnt_idmap *idmap)
> > >  {
> > > -	inode->i_gid = mapped_fsgid(mnt_userns, i_user_ns(inode));
> > > +	inode->i_gid = mapped_fsgid(idmap, i_user_ns(inode));
> > >  }
> > >  
> > >  /**
> > > @@ -1784,14 +1784,13 @@ static inline bool fsuidgid_has_mapping(struct super_block *sb,
> > >  					struct mnt_idmap *idmap)
> > >  {
> > >  	struct user_namespace *fs_userns = sb->s_user_ns;
> > > -	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  	kuid_t kuid;
> > >  	kgid_t kgid;
> > >  
> > > -	kuid = mapped_fsuid(mnt_userns, fs_userns);
> > > +	kuid = mapped_fsuid(idmap, fs_userns);
> > >  	if (!uid_valid(kuid))
> > >  		return false;
> > > -	kgid = mapped_fsgid(mnt_userns, fs_userns);
> > > +	kgid = mapped_fsgid(idmap, fs_userns);
> > >  	if (!gid_valid(kgid))
> > >  		return false;
> > >  	return kuid_has_mapping(fs_userns, kuid) &&
> > > diff --git a/include/linux/mnt_idmapping.h b/include/linux/mnt_idmapping.h
> > > index 0ccca33a7a6d..d63e7c84a389 100644
> > > --- a/include/linux/mnt_idmapping.h
> > > +++ b/include/linux/mnt_idmapping.h
> > > @@ -375,8 +375,8 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
> > >  }
> > >  
> > >  /**
> > > - * mapped_fsuid - return caller's fsuid mapped up into a mnt_userns
> > > - * @mnt_userns: the mount's idmapping
> > > + * mapped_fsuid - return caller's fsuid mapped according to an idmapping
> > > + * @idmap: the mount's idmapping
> > >   * @fs_userns: the filesystem's idmapping
> > >   *
> > >   * Use this helper to initialize a new vfs or filesystem object based on
> > > @@ -385,18 +385,19 @@ static inline kgid_t vfsgid_into_kgid(vfsgid_t vfsgid)
> > >   * O_CREAT. Other examples include the allocation of quotas for a specific
> > >   * user.
> > >   *
> > > - * Return: the caller's current fsuid mapped up according to @mnt_userns.
> > > + * Return: the caller's current fsuid mapped up according to @idmap.
> > >   */
> > > -static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
> > > +static inline kuid_t mapped_fsuid(struct mnt_idmap *idmap,
> > >  				  struct user_namespace *fs_userns)
> > >  {
> > > +	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  	return from_vfsuid(mnt_userns, fs_userns,
> > >  			   VFSUIDT_INIT(current_fsuid()));
> > >  }
> > >  
> > >  /**
> > > - * mapped_fsgid - return caller's fsgid mapped up into a mnt_userns
> > > - * @mnt_userns: the mount's idmapping
> > > + * mapped_fsgid - return caller's fsgid mapped according to an idmapping
> > > + * @idmap: the mount's idmapping
> > >   * @fs_userns: the filesystem's idmapping
> > >   *
> > >   * Use this helper to initialize a new vfs or filesystem object based on
> > > @@ -405,11 +406,12 @@ static inline kuid_t mapped_fsuid(struct user_namespace *mnt_userns,
> > >   * O_CREAT. Other examples include the allocation of quotas for a specific
> > >   * user.
> > >   *
> > > - * Return: the caller's current fsgid mapped up according to @mnt_userns.
> > > + * Return: the caller's current fsgid mapped up according to @idmap.
> > >   */
> > > -static inline kgid_t mapped_fsgid(struct user_namespace *mnt_userns,
> > > +static inline kgid_t mapped_fsgid(struct mnt_idmap *idmap,
> > >  				  struct user_namespace *fs_userns)
> > >  {
> > > +	struct user_namespace *mnt_userns = mnt_idmap_owner(idmap);
> > >  	return from_vfsgid(mnt_userns, fs_userns,
> > >  			   VFSGIDT_INIT(current_fsgid()));
> > >  }
> > > 
> > > -- 
> > > 2.34.1
> > > 




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux