On Thu, 2025-01-30 at 00:19 +0100, Christian Brauner wrote: > This adds the STATMOUNT_MNT_UIDMAP and STATMOUNT_MNT_GIDMAP options. > It allows the retrieval of idmappings via statmount(). > > Currently it isn't possible to figure out what idmappings are applied to > an idmapped mount. This information is often crucial. Before statmount() > the only realistic options for an interface like this would have been to > add it to /proc/<pid>/fdinfo/<nr> or to expose it in > /proc/<pid>/mountinfo. Both solution would have been pretty ugly and > would've shown information that is of strong interest to some > application but not all. statmount() is perfect for this. > > The idmappings applied to an idmapped mount are shown relative to the > caller's user namespace. This is the most useful solution that doesn't > risk leaking information or confuse the caller. > > For example, an idmapped mount might have been created with the > following idmappings: > > mount --bind -o X-mount.idmap="0:10000:1000 2000:2000:1 3000:3000:1" /srv /opt > > Listing the idmappings through statmount() in the same context shows: > > mnt_id: 2147485088 > mnt_parent_id: 2147484816 > fs_type: btrfs > mnt_root: /srv > mnt_point: /opt > mnt_opts: ssd,discard=async,space_cache=v2,subvolid=5,subvol=/ > mnt_uidmap[0]: 0 10000 1000 > mnt_uidmap[1]: 2000 2000 1 > mnt_uidmap[2]: 3000 3000 1 > mnt_gidmap[0]: 0 10000 1000 > mnt_gidmap[1]: 2000 2000 1 > mnt_gidmap[2]: 3000 3000 1 > > But the idmappings might not always be resolvablein the caller's user > namespace. For example: > > unshare --user --map-root > > In this case statmount() will indicate the failure to resolve the idmappings > in the caller's user namespace by listing 4294967295 aka (uid_t) -1 as > the target of the mapping while still showing the source and range of > the mapping: > > mnt_id: 2147485087 > mnt_parent_id: 2147484016 > fs_type: btrfs > mnt_root: /srv > mnt_point: /opt > mnt_opts: ssd,discard=async,space_cache=v2,subvolid=5,subvol=/ > mnt_uidmap[0]: 0 4294967295 1000 > mnt_uidmap[1]: 2000 4294967295 1 > mnt_uidmap[2]: 3000 4294967295 1 > mnt_gidmap[0]: 0 4294967295 1000 > mnt_gidmap[1]: 2000 4294967295 1 > mnt_gidmap[2]: 3000 4294967295 1 > > Note that statmount() requires that the whole range must be resolvable > in the caller's user namespace. If a subrange fails to map it will still > list the map as not resolvable. This is a practical compromise to avoid > having to find which subranges are resovable and wich aren't. > > Idmappings are listed as a string array with each mapping separated by > zero bytes. This allows to retrieve the idmappings and immediately use > them for writing to e.g., /proc/<pid>/{g,u}id_map and it also allow for > simple iteration like: > > if (stmnt->mask & STATMOUNT_MNT_UIDMAP) { > const char *idmap = stmnt->str + stmnt->mnt_uidmap; > > for (size_t idx = 0; idx < stmnt->mnt_uidmap_nr; idx++) { > printf("mnt_uidmap[%lu]: %s\n", idx, idmap); > idmap += strlen(idmap) + 1; > } > } > > Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx> > --- > fs/internal.h | 1 + > fs/mnt_idmapping.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++ > fs/namespace.c | 43 +++++++++++++++++++++++++++++++++++++++- > include/uapi/linux/mount.h | 8 +++++++- > 4 files changed, 99 insertions(+), 2 deletions(-) > > diff --git a/fs/internal.h b/fs/internal.h > index e7f02ae1e098..db6094d5cb0b 100644 > --- a/fs/internal.h > +++ b/fs/internal.h > @@ -338,3 +338,4 @@ static inline bool path_mounted(const struct path *path) > return path->mnt->mnt_root == path->dentry; > } > void file_f_owner_release(struct file *file); > +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map); > diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c > index 7b1df8cc2821..4aca8e3ba97e 100644 > --- a/fs/mnt_idmapping.c > +++ b/fs/mnt_idmapping.c > @@ -6,6 +6,7 @@ > #include <linux/mnt_idmapping.h> > #include <linux/slab.h> > #include <linux/user_namespace.h> > +#include <linux/seq_file.h> > > #include "internal.h" > > @@ -334,3 +335,51 @@ void mnt_idmap_put(struct mnt_idmap *idmap) > free_mnt_idmap(idmap); > } > EXPORT_SYMBOL_GPL(mnt_idmap_put); > + > +int statmount_mnt_idmap(struct mnt_idmap *idmap, struct seq_file *seq, bool uid_map) > +{ > + struct uid_gid_map *map, *map_up; > + > + if (idmap == &nop_mnt_idmap || idmap == &invalid_mnt_idmap) > + return 0; > + > + /* > + * Idmappings are shown relative to the caller's idmapping. > + * This is both the most intuitive and most useful solution. > + */ > + if (uid_map) { > + map = &idmap->uid_map; > + map_up = ¤t_user_ns()->uid_map; > + } else { > + map = &idmap->gid_map; > + map_up = ¤t_user_ns()->gid_map; > + } > + > + for (u32 idx = 0; idx < map->nr_extents; idx++) { > + uid_t lower; > + struct uid_gid_extent *extent; > + > + if (map->nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) > + extent = &map->extent[idx]; > + else > + extent = &map->forward[idx]; > + > + /* > + * Verify that the whole range of the mapping can be > + * resolved in the caller's idmapping. If it cannot be > + * resolved 1/4294967295 will be shown as the target of nit: I think you mean '-1/4294967295'. > + * the mapping. The source and range are shown as a hint > + * to the caller. > + */ > + lower = map_id_range_up(map_up, extent->lower_first, extent->count); > + if (lower == (uid_t) -1) > + seq_printf(seq, "%u %u %u", extent->first, -1, extent->count); > + else > + seq_printf(seq, "%u %u %u", extent->first, lower, extent->count); Again, I think a different syntax for an unresolveable range would be better. Another idea -- if you separate the fields by ':', you could just leave out the middle field when it can't be resolved -- e.g. "1000::1000". > + seq->count++; /* mappings are separated by \0 */ > + if (seq_has_overflowed(seq)) > + return -EAGAIN; > + } > + > + return map->nr_extents; > +} > diff --git a/fs/namespace.c b/fs/namespace.c > index 4013fbac354a..535e4829061f 100644 > --- a/fs/namespace.c > +++ b/fs/namespace.c > @@ -4915,6 +4915,7 @@ struct kstatmount { > struct statmount __user *buf; > size_t bufsize; > struct vfsmount *mnt; > + struct mnt_idmap *idmap; > u64 mask; > struct path root; > struct statmount sm; > @@ -5185,6 +5186,30 @@ static int statmount_opt_sec_array(struct kstatmount *s, struct seq_file *seq) > return 0; > } > > +static inline int statmount_mnt_uidmap(struct kstatmount *s, struct seq_file *seq) > +{ > + int ret; > + > + ret = statmount_mnt_idmap(s->idmap, seq, true); > + if (ret < 0) > + return ret; > + > + s->sm.mnt_uidmap_num = ret; > + return 0; > +} > + > +static inline int statmount_mnt_gidmap(struct kstatmount *s, struct seq_file *seq) > +{ > + int ret; > + > + ret = statmount_mnt_idmap(s->idmap, seq, false); > + if (ret < 0) > + return ret; > + > + s->sm.mnt_gidmap_num = ret; > + return 0; > +} > + > static int statmount_string(struct kstatmount *s, u64 flag) > { > int ret = 0; > @@ -5226,6 +5251,14 @@ static int statmount_string(struct kstatmount *s, u64 flag) > sm->sb_source = start; > ret = statmount_sb_source(s, seq); > break; > + case STATMOUNT_MNT_UIDMAP: > + sm->mnt_uidmap = start; > + ret = statmount_mnt_uidmap(s, seq); > + break; > + case STATMOUNT_MNT_GIDMAP: > + sm->mnt_gidmap = start; > + ret = statmount_mnt_gidmap(s, seq); > + break; > default: > WARN_ON_ONCE(true); > return -EINVAL; > @@ -5350,6 +5383,7 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, > return err; > > s->root = root; > + s->idmap = mnt_idmap(s->mnt); > if (s->mask & STATMOUNT_SB_BASIC) > statmount_sb_basic(s); > > @@ -5383,6 +5417,12 @@ static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id, > if (!err && s->mask & STATMOUNT_SB_SOURCE) > err = statmount_string(s, STATMOUNT_SB_SOURCE); > > + if (!err && s->mask & STATMOUNT_MNT_UIDMAP) > + err = statmount_string(s, STATMOUNT_MNT_UIDMAP); > + > + if (!err && s->mask & STATMOUNT_MNT_GIDMAP) > + err = statmount_string(s, STATMOUNT_MNT_GIDMAP); > + > if (!err && s->mask & STATMOUNT_MNT_NS_ID) > statmount_mnt_ns_id(s, ns); > > @@ -5406,7 +5446,8 @@ static inline bool retry_statmount(const long ret, size_t *seq_size) > #define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \ > STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS | \ > STATMOUNT_FS_SUBTYPE | STATMOUNT_SB_SOURCE | \ > - STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY) > + STATMOUNT_OPT_ARRAY | STATMOUNT_OPT_SEC_ARRAY | \ > + STATMOUNT_MNT_UIDMAP | STATMOUNT_MNT_GIDMAP) > > static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq, > struct statmount __user *buf, size_t bufsize, > diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h > index c07008816aca..0be6ac4c1624 100644 > --- a/include/uapi/linux/mount.h > +++ b/include/uapi/linux/mount.h > @@ -179,7 +179,11 @@ struct statmount { > __u32 opt_array; /* [str] Array of nul terminated fs options */ > __u32 opt_sec_num; /* Number of security options */ > __u32 opt_sec_array; /* [str] Array of nul terminated security options */ > - __u64 __spare2[46]; > + __u32 mnt_uidmap_num; /* Number of uid mappings */ > + __u32 mnt_uidmap; /* [str] Array of uid mappings (as seen from callers namespace) */ > + __u32 mnt_gidmap_num; /* Number of gid mappings */ > + __u32 mnt_gidmap; /* [str] Array of gid mappings (as seen from callers namespace) */ > + __u64 __spare2[44]; > char str[]; /* Variable size part containing strings */ > }; > > @@ -217,6 +221,8 @@ struct mnt_id_req { > #define STATMOUNT_SB_SOURCE 0x00000200U /* Want/got sb_source */ > #define STATMOUNT_OPT_ARRAY 0x00000400U /* Want/got opt_... */ > #define STATMOUNT_OPT_SEC_ARRAY 0x00000800U /* Want/got opt_sec... */ > +#define STATMOUNT_MNT_UIDMAP 0x00001000U /* Want/got uidmap... */ > +#define STATMOUNT_MNT_GIDMAP 0x00002000U /* Want/got gidmap... */ > > /* > * Special @mnt_id values that can be passed to listmount > -- Jeff Layton <jlayton@xxxxxxxxxx>