Re: [PATCH 2/4] statmount: allow to retrieve idmappings

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 2025-01-30 at 00:19 +0100, Christian Brauner wrote:
> This adds the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP options.
> It allows the retrieval of idmappings via statmount().
> 
> Currently it isn't possible to figure out what idmappings are applied to
> an idmapped mount. This information is often crucial. Before statmount()
> the only realistic options for an interface like this would have been to
> add it to /proc/<pid>/fdinfo/<nr> or to expose it in
> /proc/<pid>/mountinfo. Both solution would have been pretty ugly and
> would've shown information that is of strong interest to some
> application but not all. statmount() is perfect for this.
> 
> The idmappings applied to an idmapped mount are shown relative to the
> caller's user namespace. This is the most useful solution that doesn't
> risk leaking information or confuse the caller.
> 
> For example, an idmapped mount might have been created with the
> following idmappings:
> 
>     mount --bind -o X-mount.idmap="0:10000:1000 2000:2000:1 3000:3000:1" /srv /opt
> 
> Listing the idmappings through statmount() in the same context shows:
> 
>     mnt_id:        2147485088
>     mnt_parent_id: 2147484816
>     fs_type:       btrfs
>     mnt_root:      /srv
>     mnt_point:     /opt
>     mnt_opts:      ssd,discard=async,space_cache=v2,subvolid=5,subvol=/
>     mnt_uidmap[0]: 0 10000 1000
>     mnt_uidmap[1]: 2000 2000 1
>     mnt_uidmap[2]: 3000 3000 1
>     mnt_gidmap[0]: 0 10000 1000
>     mnt_gidmap[1]: 2000 2000 1
>     mnt_gidmap[2]: 3000 3000 1
> 
> But the idmappings might not always be resolvablein the caller's user
> namespace. For example:
> 
>     unshare --user --map-root
> 
> In this case statmount() will indicate the failure to resolve the idmappings
> in the caller's user namespace by listing 4294967295 aka (uid_t) -1 as
> the target of the mapping while still showing the source and range of
> the mapping:
> 
>     mnt_id:        2147485087
>     mnt_parent_id: 2147484016
>     fs_type:       btrfs
>     mnt_root:      /srv
>     mnt_point:     /opt
>     mnt_opts:      ssd,discard=async,space_cache=v2,subvolid=5,subvol=/
>     mnt_uidmap[0]: 0 4294967295 1000
>     mnt_uidmap[1]: 2000 4294967295 1
>     mnt_uidmap[2]: 3000 4294967295 1
>     mnt_gidmap[0]: 0 4294967295 1000
>     mnt_gidmap[1]: 2000 4294967295 1
>     mnt_gidmap[2]: 3000 4294967295 1
> 
> Note that statmount() requires that the whole range must be resolvable
> in the caller's user namespace. If a subrange fails to map it will still
> list the map as not resolvable. This is a practical compromise to avoid
> having to find which subranges are resovable and wich aren't.
> 
> Idmappings are listed as a string array with each mapping separated by
> zero bytes. This allows to retrieve the idmappings and immediately use
> them for writing to e.g., /proc/<pid>/{g,u}id_map and it also allow for
> simple iteration like:
> 
>     if (stmnt->mask & STATMOUNT_MNT_UIDMAP) {
>             const char *idmap = stmnt->str + stmnt->mnt_uidmap;
> 
>             for (size_t idx = 0; idx < stmnt->mnt_uidmap_nr; idx++) {
>                     printf("mnt_uidmap[%lu]: %s\n", idx, idmap);
>                     idmap += strlen(idmap) + 1;
>             }
>     }
> 
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
> ---
>  fs/internal.h              |  1 +
>  fs/mnt_idmapping.c         | 49 ++++++++++++++++++++++++++++++++++++++++++++++
>  fs/namespace.c             | 43 +++++++++++++++++++++++++++++++++++++++-
>  include/uapi/linux/mount.h |  8 +++++++-
>  4 files changed, 99 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/internal.h b/fs/internal.h
> index e7f02ae1e098..db6094d5cb0b 100644
> --- a/fs/internal.h
> +++ b/fs/internal.h
> @@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path)
>  	return path->mnt->mnt_root == path->dentry;
>  }
>  void file_f_owner_release(struct file *file);
> +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map);
> diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> index 7b1df8cc2821..4aca8e3ba97e 100644
> --- a/fs/mnt_idmapping.c
> +++ b/fs/mnt_idmapping.c
> @@ -6,6 +6,7 @@
>  #include <linux/mnt_idmapping.h>
>  #include <linux/slab.h>
>  #include <linux/user_namespace.h>
> +#include <linux/seq_file.h>
>  
>  #include "internal.h"
>  
> @@ -334,3 +335,51 @@ void mnt_idmap_put(struct mnt_idmap *idmap)
>  		free_mnt_idmap(idmap);
>  }
>  EXPORT_SYMBOL_GPL(mnt_idmap_put);
> +
> +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map)
> +{
> +	struct uid_gid_map *map, *map_up;
> +
> +	if (idmap == &nop_mnt_idmap || idmap == &invalid_mnt_idmap)
> +		return 0;
> +
> +	/*
> +	 * Idmappings are shown relative to the caller's idmapping.
> +	 * This is both the most intuitive and most useful solution.
> +	 */
> +	if (uid_map) {
> +		map = &idmap->uid_map;
> +		map_up = &current_user_ns()->uid_map;
> +	} else {
> +		map = &idmap->gid_map;
> +		map_up = &current_user_ns()->gid_map;
> +	}
> +
> +	for (u32 idx = 0; idx < map->nr_extents; idx++) {
> +		uid_t lower;
> +		struct uid_gid_extent *extent;
> +
> +		if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS)
> +			extent = &map->extent[idx];
> +		else
> +			extent = &map->forward[idx];
> +
> +		/*
> +		 * Verify that the whole range of the mapping can be
> +		 * resolved in the caller's idmapping. If it cannot be
> +		 * resolved 1/4294967295 will be shown as the target of

nit: I think you mean '-1/4294967295'.

> +		 * the mapping. The source and range are shown as a hint
> +		 * to the caller.
> +		 */
> +		lower = map_id_range_up(map_up, extent->lower_first, extent->count);
> +		if (lower == (uid_t) -1)
> +			seq_printf(seq, "%u %u %u", extent->first, -1, extent->count);
> +		else
> +			seq_printf(seq, "%u %u %u", extent->first, lower, extent->count);

Again, I think a different syntax for an unresolveable range would be
better. Another idea -- if you separate the fields by ':', you could
just leave out the middle field when it can't be resolved -- e.g.
"1000::1000".

> +		seq->count++; /* mappings are separated by \0 */
> +		if (seq_has_overflowed(seq))
> +			return -EAGAIN;
> +	}
> +
> +	return map->nr_extents;
> +}
> diff --git a/fs/namespace.c b/fs/namespace.c
> index 4013fbac354a..535e4829061f 100644
> --- a/fs/namespace.c
> +++ b/fs/namespace.c
> @@ -4915,6 +4915,7 @@ struct kstatmount {
>  	struct statmount __user *buf;
>  	size_t bufsize;
>  	struct vfsmount *mnt;
> +	struct mnt_idmap *idmap;
>  	u64 mask;
>  	struct path root;
>  	struct statmount sm;
> @@ -5185,6 +5186,30 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq)
>  	return 0;
>  }
>  
> +static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq)
> +{
> +	int ret;
> +
> +	ret = statmount_mnt_idmap(s->idmap, seq, true);
> +	if (ret < 0)
> +		return ret;
> +
> +	s->sm.mnt_uidmap_num = ret;
> +	return 0;
> +}
> +
> +static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq)
> +{
> +	int ret;
> +
> +	ret = statmount_mnt_idmap(s->idmap, seq, false);
> +	if (ret < 0)
> +		return ret;
> +
> +	s->sm.mnt_gidmap_num = ret;
> +	return 0;
> +}
> +
>  static int statmount_string(struct kstatmount *s, u64 flag)
>  {
>  	int ret = 0;
> @@ -5226,6 +5251,14 @@ static int statmount_string(struct kstatmount *s, u64 flag)
>  		sm->sb_source = start;
>  		ret = statmount_sb_source(s, seq);
>  		break;
> +	case STATMOUNT_MNT_UIDMAP:
> +		sm->mnt_uidmap = start;
> +		ret = statmount_mnt_uidmap(s, seq);
> +		break;
> +	case STATMOUNT_MNT_GIDMAP:
> +		sm->mnt_gidmap = start;
> +		ret = statmount_mnt_gidmap(s, seq);
> +		break;
>  	default:
>  		WARN_ON_ONCE(true);
>  		return -EINVAL;
> @@ -5350,6 +5383,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
>  		return err;
>  
>  	s->root = root;
> +	s->idmap = mnt_idmap(s->mnt);
>  	if (s->mask & STATMOUNT_SB_BASIC)
>  		statmount_sb_basic(s);
>  
> @@ -5383,6 +5417,12 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
>  	if (!err && s->mask & STATMOUNT_SB_SOURCE)
>  		err = statmount_string(s, STATMOUNT_SB_SOURCE);
>  
> +	if (!err && s->mask & STATMOUNT_MNT_UIDMAP)
> +		err = statmount_string(s, STATMOUNT_MNT_UIDMAP);
> +
> +	if (!err && s->mask & STATMOUNT_MNT_GIDMAP)
> +		err = statmount_string(s, STATMOUNT_MNT_GIDMAP);
> +
>  	if (!err && s->mask & STATMOUNT_MNT_NS_ID)
>  		statmount_mnt_ns_id(s, ns);
>  
> @@ -5406,7 +5446,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
>  #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
>  			      STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \
>  			      STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \
> -			      STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY)
> +			      STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \
> +			      STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP)
>  
>  static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
>  			      struct statmount __user *buf, size_t bufsize,
> diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
> index c07008816aca..0be6ac4c1624 100644
> --- a/include/uapi/linux/mount.h
> +++ b/include/uapi/linux/mount.h
> @@ -179,7 +179,11 @@ struct statmount {
>  	__u32 opt_array;	/* [str] Array of nul terminated fs options */
>  	__u32 opt_sec_num;	/* Number of security options */
>  	__u32 opt_sec_array;	/* [str] Array of nul terminated security options */
> -	__u64 __spare2[46];
> +	__u32 mnt_uidmap_num;	/* Number of uid mappings */
> +	__u32 mnt_uidmap;	/* [str] Array of uid mappings (as seen from callers namespace) */
> +	__u32 mnt_gidmap_num;	/* Number of gid mappings */
> +	__u32 mnt_gidmap;	/* [str] Array of gid mappings (as seen from callers namespace) */
> +	__u64 __spare2[44];
>  	char str[];		/* Variable size part containing strings */
>  };
>  
> @@ -217,6 +221,8 @@ struct mnt_id_req {
>  #define STATMOUNT_SB_SOURCE		0x00000200U	/* Want/got sb_source */
>  #define STATMOUNT_OPT_ARRAY		0x00000400U	/* Want/got opt_... */
>  #define STATMOUNT_OPT_SEC_ARRAY		0x00000800U	/* Want/got opt_sec... */
> +#define STATMOUNT_MNT_UIDMAP		0x00001000U	/* Want/got uidmap... */
> +#define STATMOUNT_MNT_GIDMAP		0x00002000U	/* Want/got gidmap... */
>  
>  /*
>   * Special @mnt_id values that can be passed to listmount
> 

-- 
Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux