Re: [PATCH 3/4] mnt_idmapping: decouple from namespaces

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 22, 2023 at 01:44:39PM +0100, Christian Brauner wrote:
> There's no reason we need to couple mnt idmapping to namespaces in the
> way we currently do. Copy the idmapping when an idmapped mount is
> created and don't take any reference on the namespace at all.
> 
> We also can't easily refcount struct uid_gid_map because it needs to
> stay the size of a cacheline otherwise we risk performance regressions
> (Ignoring for a second that right now struct uid_gid_map isn't actually
>  64 byte but 72 but that's a fix for another patch series.).
> 
> Signed-off-by: Christian Brauner <brauner@xxxxxxxxxx>
> ---
>  fs/mnt_idmapping.c      | 106 +++++++++++++++++++++++++++++++++++++++++-------
>  include/linux/uidgid.h  |  13 ++++++
>  kernel/user_namespace.c |   4 +-
>  3 files changed, 106 insertions(+), 17 deletions(-)
> 
> diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
> index 35d78cb3c38a..64c5205e2b5e 100644
> --- a/fs/mnt_idmapping.c
> +++ b/fs/mnt_idmapping.c
> @@ -9,8 +9,16 @@
>  
>  #include "internal.h"
>  
> +/*
> + * Outside of this file vfs{g,u}id_t are always created from k{g,u}id_t,
> + * never from raw values. These are just internal helpers.
> + */
> +#define VFSUIDT_INIT_RAW(val) (vfsuid_t){ val }
> +#define VFSGIDT_INIT_RAW(val) (vfsgid_t){ val }
> +
>  struct mnt_idmap {
> -	struct user_namespace *owner;
> +	struct uid_gid_map uid_map;
> +	struct uid_gid_map gid_map;
>  	refcount_t count;
>  };
>  
> @@ -20,7 +28,6 @@ struct mnt_idmap {
>   * mapped to {g,u}id 1, [...], {g,u}id 1000 to {g,u}id 1000, [...].
>   */
>  struct mnt_idmap nop_mnt_idmap = {
> -	.owner	= &init_user_ns,
>  	.count	= REFCOUNT_INIT(1),
>  };
>  EXPORT_SYMBOL_GPL(nop_mnt_idmap);
> @@ -65,7 +72,6 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
>  		     kuid_t kuid)
>  {
>  	uid_t uid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return VFSUIDT_INIT(kuid);
> @@ -75,7 +81,7 @@ vfsuid_t make_vfsuid(struct mnt_idmap *idmap,
>  		uid = from_kuid(fs_userns, kuid);
>  	if (uid == (uid_t)-1)
>  		return INVALID_VFSUID;
> -	return VFSUIDT_INIT(make_kuid(mnt_userns, uid));
> +	return VFSUIDT_INIT_RAW(map_id_down(&idmap->uid_map, uid));
>  }
>  EXPORT_SYMBOL_GPL(make_vfsuid);
>  
> @@ -103,7 +109,6 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
>  		     struct user_namespace *fs_userns, kgid_t kgid)
>  {
>  	gid_t gid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return VFSGIDT_INIT(kgid);
> @@ -113,7 +118,7 @@ vfsgid_t make_vfsgid(struct mnt_idmap *idmap,
>  		gid = from_kgid(fs_userns, kgid);
>  	if (gid == (gid_t)-1)
>  		return INVALID_VFSGID;
> -	return VFSGIDT_INIT(make_kgid(mnt_userns, gid));
> +	return VFSGIDT_INIT_RAW(map_id_down(&idmap->gid_map, gid));
>  }
>  EXPORT_SYMBOL_GPL(make_vfsgid);
>  
> @@ -132,11 +137,10 @@ kuid_t from_vfsuid(struct mnt_idmap *idmap,
>  		   struct user_namespace *fs_userns, vfsuid_t vfsuid)
>  {
>  	uid_t uid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return AS_KUIDT(vfsuid);
> -	uid = from_kuid(mnt_userns, AS_KUIDT(vfsuid));
> +	uid = map_id_up(&idmap->uid_map, __vfsuid_val(vfsuid));
>  	if (uid == (uid_t)-1)
>  		return INVALID_UID;
>  	if (initial_idmapping(fs_userns))
> @@ -160,11 +164,10 @@ kgid_t from_vfsgid(struct mnt_idmap *idmap,
>  		   struct user_namespace *fs_userns, vfsgid_t vfsgid)
>  {
>  	gid_t gid;
> -	struct user_namespace *mnt_userns = idmap->owner;
>  
>  	if (idmap == &nop_mnt_idmap)
>  		return AS_KGIDT(vfsgid);
> -	gid = from_kgid(mnt_userns, AS_KGIDT(vfsgid));
> +	gid = map_id_up(&idmap->gid_map, __vfsgid_val(vfsgid));
>  	if (gid == (gid_t)-1)
>  		return INVALID_GID;
>  	if (initial_idmapping(fs_userns))
> @@ -195,16 +198,91 @@ int vfsgid_in_group_p(vfsgid_t vfsgid)
>  #endif
>  EXPORT_SYMBOL_GPL(vfsgid_in_group_p);
>  
> +static int copy_mnt_idmap(struct uid_gid_map *map_from,
> +			  struct uid_gid_map *map_to)
> +{
> +	struct uid_gid_extent *forward, *reverse;
> +	u32 nr_extents = READ_ONCE(map_from->nr_extents);
> +	/* Pairs with smp_wmb() when writing the idmapping. */
> +	smp_rmb();
> +
> +	/*
> +	 * Don't blindly copy @map_to into @map_from if nr_extents is
> +	 * smaller or equal to UID_GID_MAP_MAX_BASE_EXTENTS. Since we
> +	 * read @nr_extents someone could have written an idmapping and
> +	 * then we might end up with inconsistent data. So just don't do
> +	 * anything at all.
> +	 */
> +	if (nr_extents == 0)
> +		return 0;
> +
> +	/*
> +	 * Here we know that nr_extents is greater than zero which means
> +	 * a map has been written. Since idmappings can't be changed
> +	 * once they have been written we know that we can safely copy
> +	 * from @map_to into @map_from.
> +	 */
> +
> +	if (nr_extents <= UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		*map_to = *map_from;
> +		return 0;
> +	}
> +
> +	forward = kmemdup(map_from->forward,
> +			  nr_extents * sizeof(struct uid_gid_extent),
> +			  GFP_KERNEL_ACCOUNT);
> +	if (!forward)
> +		return -ENOMEM;
> +
> +	reverse = kmemdup(map_from->reverse,
> +			  nr_extents * sizeof(struct uid_gid_extent),
> +			  GFP_KERNEL_ACCOUNT);
> +	if (!reverse) {
> +		kfree(forward);
> +		return -ENOMEM;
> +	}
> +
> +	/*
> +	 * The idmapping isn't exposed anywhere so we don't need to care
> +	 * about ordering between extent pointers and @nr_extents
> +	 * initialization.
> +	 */
> +	map_to->forward = forward;
> +	map_to->reverse = reverse;
> +	map_to->nr_extents = nr_extents;
> +	return 0;
> +}
> +
> +static void free_mnt_idmap(struct mnt_idmap *idmap)
> +{
> +	if (idmap->uid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		kfree(idmap->uid_map.forward);
> +		kfree(idmap->uid_map.reverse);
> +	}
> +	if (idmap->gid_map.nr_extents > UID_GID_MAP_MAX_BASE_EXTENTS) {
> +		kfree(idmap->gid_map.forward);
> +		kfree(idmap->gid_map.reverse);
> +	}
> +	kfree(idmap);
> +}
> +
>  struct mnt_idmap *alloc_mnt_idmap(struct user_namespace *mnt_userns)
>  {
>  	struct mnt_idmap *idmap;
> +	int ret;
>  
>  	idmap = kzalloc(sizeof(struct mnt_idmap), GFP_KERNEL_ACCOUNT);
>  	if (!idmap)
>  		return ERR_PTR(-ENOMEM);
>  
> -	idmap->owner = get_user_ns(mnt_userns);
>  	refcount_set(&idmap->count, 1);
> +	ret = copy_mnt_idmap(&mnt_userns->uid_map, &idmap->uid_map);
> +	if (!ret)
> +		ret = copy_mnt_idmap(&mnt_userns->gid_map, &idmap->gid_map);
> +	if (ret) {
> +		free_mnt_idmap(idmap);
> +		idmap = ERR_PTR(ret);
> +	}
>  	return idmap;
>  }
>  
> @@ -234,9 +312,7 @@ EXPORT_SYMBOL_GPL(mnt_idmap_get);
>   */
>  void mnt_idmap_put(struct mnt_idmap *idmap)
>  {
> -	if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count)) {
> -		put_user_ns(idmap->owner);
> -		kfree(idmap);
> -	}
> +	if (idmap != &nop_mnt_idmap && refcount_dec_and_test(&idmap->count))
> +		free_mnt_idmap(idmap);
>  }
>  EXPORT_SYMBOL_GPL(mnt_idmap_put);
> diff --git a/include/linux/uidgid.h b/include/linux/uidgid.h
> index b0542cd11aeb..7806e93b907d 100644
> --- a/include/linux/uidgid.h
> +++ b/include/linux/uidgid.h
> @@ -17,6 +17,7 @@
>  
>  struct user_namespace;
>  extern struct user_namespace init_user_ns;
> +struct uid_gid_map;
>  
>  typedef struct {
>  	uid_t val;
> @@ -138,6 +139,9 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
>  	return from_kgid(ns, gid) != (gid_t) -1;
>  }
>  
> +u32 map_id_down(struct uid_gid_map *map, u32 id);
> +u32 map_id_up(struct uid_gid_map *map, u32 id);
> +
>  #else
>  
>  static inline kuid_t make_kuid(struct user_namespace *from, uid_t uid)
> @@ -186,6 +190,15 @@ static inline bool kgid_has_mapping(struct user_namespace *ns, kgid_t gid)
>  	return gid_valid(gid);
>  }
>  
> +static inline u32 map_id_down(struct uid_gid_map *map, u32 id)
> +{
> +	return id;
> +}
> +
> +static inline u32 map_id_up(struct uid_gid_map *map, u32 id);

You accidentally put a ; here, and then fix it up in the next patch, it needs to
be fixed here.  Thanks,

Josef




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux