Re: [PATCH v2] proc: "mount -o lookup=" support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Jan 19, 2022 at 06:48:03PM +0300, Alexey Dobriyan wrote:
> >From 61376c85daab50afb343ce50b5a97e562bc1c8d3 Mon Sep 17 00:00:00 2001
> From: Alexey Dobriyan <adobriyan@xxxxxxxxx>
> Date: Mon, 22 Nov 2021 20:41:06 +0300
> Subject: [PATCH 1/1] proc: "mount -o lookup=..." support
> 
> Docker implements MaskedPaths configuration option
> 
> 	https://github.com/estesp/docker/blob/9c15e82f19b0ad3c5fe8617a8ec2dddc6639f40a/oci/defaults.go#L97
> 
> to disable certain /proc files. It overmounts them with /dev/null.
> 
> Implement proper mount option which selectively disables lookup/readdir
> in the top level /proc directory so that MaskedPaths doesn't need
> to be updated as time goes on.
> 
> Syntax is
> 
> 			Filter everything
> 	# mount -t proc -o lookup=/ proc /proc
> 	# ls /proc
> 	dr-xr-xr-x   8 root       root          0 Nov 22 21:12 995
> 	lrwxrwxrwx   1 root       root          0 Nov 22 21:12 self -> 1163
> 	lrwxrwxrwx   1 root       root          0 Nov 22 21:12 thread-self -> 1163/task/1163
> 
> 			Allow /proc/cpuinfo and /proc/uptime
> 	# mount -t proc proc -o lookup=cpuinfo/uptime /proc
> 
> 	# ls /proc
> 				...
> 	dr-xr-xr-x   8 root       root          0 Nov 22 21:12 995
> 	-r--r--r--   1 root       root          0 Nov 22 21:12 cpuinfo
> 	lrwxrwxrwx   1 root       root          0 Nov 22 21:12 self -> 1163
> 	lrwxrwxrwx   1 root       root          0 Nov 22 21:12 thread-self -> 1163/task/1163
> 	-r--r--r--   1 root       root          0 Nov 22 21:12 uptime
> 
> Trailing slash is optional but saves 1 allocation.
> Trailing slash is mandatory for "filter everything".
> 
> Remounting with lookup= is disabled so that files and dcache entries
> don't stay active while filter list is changed. Users are supposed
> to unmount and mount again with different lookup= set.
> Remount rules may change in the future. (Eric W. Biederman)
> 
> Re: speed
> This is the price for filtering, given that lookup= is whitelist it is
> not supposed to be very long. Second, it is one linear memory scan per
> lookup, there are no linked lists. It may be faster than rbtree in fact.
> It consumes 1 allocation per superblock which is list of names itself.
> 
> Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
> ---
> 
> 	v2
> 	documentation!
> 	descriptive comments!
> 	disable remount
> 
>  Documentation/filesystems/proc.rst |   8 ++
>  fs/proc/generic.c                  |  18 ++--
>  fs/proc/internal.h                 |  31 ++++++-
>  fs/proc/proc_net.c                 |   2 +-
>  fs/proc/root.c                     | 127 ++++++++++++++++++++++++++++-
>  include/linux/proc_fs.h            |   2 +
>  6 files changed, 178 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst
> index 8d7f141c6fc7..9a328f0b4346 100644
> --- a/Documentation/filesystems/proc.rst
> +++ b/Documentation/filesystems/proc.rst
> @@ -2186,6 +2186,7 @@ The following mount options are supported:
>  	hidepid=	Set /proc/<pid>/ access mode.
>  	gid=		Set the group authorized to learn processes information.
>  	subset=		Show only the specified subset of procfs.
> +        lookup=         Top-level /proc filter, independent of subset=

Will it be possible to combine lookup= and subset= options when mounting?

>  	=========	========================================================
>  
>  hidepid=off or hidepid=0 means classic mode - everybody may access all
> @@ -2218,6 +2219,13 @@ information about processes information, just add identd to this group.
>  subset=pid hides all top level files and directories in the procfs that
>  are not related to tasks.
>  
> +lookup= mount option makes available only listed files/directories in
> +the top-level /proc directory. Individual names are separated
> +by slash. Empty list is equivalent to subset=pid. lookup= filters before
> +subset= if both options are supplied. lookup= doesn't affect /proc/${pid}
> +directories availability as well as /proc/self and /proc/thread-self
> +symlinks. More fine-grained filtering is not supported at the moment.
> +
>  Chapter 5: Filesystem behavior
>  ==============================
>  
> diff --git a/fs/proc/generic.c b/fs/proc/generic.c
> index 5b78739e60e4..4d04f8d89cdc 100644
> --- a/fs/proc/generic.c
> +++ b/fs/proc/generic.c
> @@ -282,7 +282,7 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
>   * for success..
>   */
>  int proc_readdir_de(struct file *file, struct dir_context *ctx,
> -		    struct proc_dir_entry *de)
> +		    struct proc_dir_entry *de, const struct proc_lookup_list *ll)
>  {
>  	int i;
>  
> @@ -307,12 +307,15 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
>  		struct proc_dir_entry *next;
>  		pde_get(de);
>  		read_unlock(&proc_subdir_lock);
> -		if (!dir_emit(ctx, de->name, de->namelen,
> -			    de->low_ino, de->mode >> 12)) {
> -			pde_put(de);
> -			return 0;
> +
> +		if (in_lookup_list(ll, de->name, de->namelen)) {
> +			if (!dir_emit(ctx, de->name, de->namelen, de->low_ino, de->mode >> 12)) {
> +				pde_put(de);
> +				return 0;
> +			}
> +			ctx->pos++;
>  		}
> -		ctx->pos++;
> +
>  		read_lock(&proc_subdir_lock);
>  		next = pde_subdir_next(de);
>  		pde_put(de);
> @@ -330,7 +333,8 @@ int proc_readdir(struct file *file, struct dir_context *ctx)
>  	if (fs_info->pidonly == PROC_PIDONLY_ON)
>  		return 1;
>  
> -	return proc_readdir_de(file, ctx, PDE(inode));
> +	return proc_readdir_de(file, ctx, PDE(inode),
> +				PDE(inode) == &proc_root ? fs_info->lookup_list : NULL);
>  }
>  
>  /*
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 03415f3fb3a8..e74acb437c56 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -190,7 +190,7 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
>  extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
>  struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
>  extern int proc_readdir(struct file *, struct dir_context *);
> -int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *);
> +int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *, const struct proc_lookup_list *);
>  
>  static inline void pde_get(struct proc_dir_entry *pde)
>  {
> @@ -318,3 +318,32 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde)
>  	/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
>  	pde->proc_dops = &proc_net_dentry_ops;
>  }
> +
> +/*
> + * Pascal strings stiched together making filtering memory access pattern linear.
> + *
> + * "mount -t proc -o lookup=/" results in
> + *
> + *	(u8[]){
> + *		0
> + *	}
> + *
> + * "mount -t proc -o lookup=cpuinfo/uptime/" results in
> + *
> + *	(u8[]){
> + *		7, 'c', 'p', 'u', 'i', 'n', 'f', 'o',
> + *		6, 'u', 'p', 't', 'i', 'm', 'e',
> + *		0
> + *	}
> + */
> +struct proc_lookup_list {
> +	u8 len;
> +	char str[];
> +};
> +
> +static inline struct proc_lookup_list *lookup_list_next(const struct proc_lookup_list *ll)
> +{
> +	return (struct proc_lookup_list *)((void *)ll + 1 + ll->len);
> +}
> +
> +bool in_lookup_list(const struct proc_lookup_list *ll, const char *str, unsigned int len);
> diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
> index 15c2e55d2ed2..7941df2d3d74 100644
> --- a/fs/proc/proc_net.c
> +++ b/fs/proc/proc_net.c
> @@ -321,7 +321,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
>  	ret = -EINVAL;
>  	net = get_proc_task_net(file_inode(file));
>  	if (net != NULL) {
> -		ret = proc_readdir_de(file, ctx, net->proc_net);
> +		ret = proc_readdir_de(file, ctx, net->proc_net, NULL);
>  		put_net(net);
>  	}
>  	return ret;
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index c7e3b1350ef8..8000558d7d2c 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -35,18 +35,22 @@ struct proc_fs_context {
>  	enum proc_hidepid	hidepid;
>  	int			gid;
>  	enum proc_pidonly	pidonly;
> +	struct proc_lookup_list	*lookup_list;
> +	unsigned int		lookup_list_len;
>  };
>  
>  enum proc_param {
>  	Opt_gid,
>  	Opt_hidepid,
>  	Opt_subset,
> +	Opt_lookup,
>  };
>  
>  static const struct fs_parameter_spec proc_fs_parameters[] = {
>  	fsparam_u32("gid",	Opt_gid),
>  	fsparam_string("hidepid",	Opt_hidepid),
>  	fsparam_string("subset",	Opt_subset),
> +	fsparam_string("lookup",	Opt_lookup),
>  	{}
>  };
>  
> @@ -112,6 +116,65 @@ static int proc_parse_subset_param(struct fs_context *fc, char *value)
>  	return 0;
>  }
>  
> +static int proc_parse_lookup_param(struct fs_context *fc, char *str0)
> +{
> +	struct proc_fs_context *ctx = fc->fs_private;
> +	struct proc_lookup_list *ll;
> +	char *str;
> +	const char *slash;
> +	const char *src;
> +	unsigned int len;
> +	int rv;
> +
> +	/* Force trailing slash, simplify loops below. */
> +	len = strlen(str0);
> +	if (len > 0 && str0[len - 1] == '/') {
> +		str = str0;
> +	} else {
> +		str = kmalloc(len + 2, GFP_KERNEL);
> +		if (!str) {
> +			rv = -ENOMEM;
> +			goto out;
> +		}
> +		memcpy(str, str0, len);
> +		str[len] = '/';
> +		str[len + 1] = '\0';
> +	}
> +
> +	len = 0;
> +	for (src = str; (slash = strchr(src, '/')); src = slash + 1) {
> +		if (slash - src >= 256) {
> +			rv = -EINVAL;
> +			goto out_free_str;
> +		}
> +		len += 1 + (slash - src);
> +	}
> +	len += 1;
> +
> +	ctx->lookup_list = ll = kmalloc(len, GFP_KERNEL);
> +	ctx->lookup_list_len = len;
> +	if (!ll) {
> +		rv = -ENOMEM;
> +		goto out_free_str;
> +	}
> +
> +	for (src = str; (slash = strchr(src, '/')); src = slash + 1) {
> +		ll->len = slash - src;
> +		memcpy(ll->str, src, ll->len);
> +		ll = lookup_list_next(ll);
> +	}
> +	ll->len = 0;
> +
> +	rv = 0;
> +
> +out_free_str:
> +	if (str != str0) {
> +		kfree(str);
> +	}
> +out:
> +	return rv;
> +}
> +
>  static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
>  {
>  	struct proc_fs_context *ctx = fc->fs_private;
> @@ -137,6 +200,11 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
>  			return -EINVAL;
>  		break;
>  
> +	case Opt_lookup:
> +		if (proc_parse_lookup_param(fc, param->string) < 0)
> +			return -EINVAL;
> +		break;
> +
>  	default:
>  		return -EINVAL;
>  	}
> @@ -157,6 +225,10 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
>  		fs_info->hide_pid = ctx->hidepid;
>  	if (ctx->mask & (1 << Opt_subset))
>  		fs_info->pidonly = ctx->pidonly;
> +	if (ctx->mask & (1 << Opt_lookup)) {
> +		fs_info->lookup_list = ctx->lookup_list;
> +		ctx->lookup_list = NULL;
> +	}
>  }
>  
>  static int proc_fill_super(struct super_block *s, struct fs_context *fc)
> @@ -218,6 +290,14 @@ static int proc_reconfigure(struct fs_context *fc)
>  	struct super_block *sb = fc->root->d_sb;
>  	struct proc_fs_info *fs_info = proc_sb_info(sb);
>  
> +	/*
> +	 * "Hide everything" lookup filter is not a problem as only
> +	 * /proc/${pid}, /proc/self and /proc/thread-self are accessible.
> +	 */
> +	if (fs_info->lookup_list && fs_info->lookup_list->len > 0) {
> +		return invalfc(fc, "'-o remount,lookup=' is unsupported, unmount and mount instead");
> +	}
> +
>  	sync_filesystem(sb);
>  
>  	proc_apply_options(fs_info, fc, current_user_ns());
> @@ -234,11 +314,34 @@ static void proc_fs_context_free(struct fs_context *fc)
>  	struct proc_fs_context *ctx = fc->fs_private;
>  
>  	put_pid_ns(ctx->pid_ns);
> +	kfree(ctx->lookup_list);
>  	kfree(ctx);
>  }
>  
> +static int proc_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
> +{
> +	struct proc_fs_context *src = fc->fs_private;
> +	struct proc_fs_context *dst;
> +
> +	dst = kmemdup(src, sizeof(struct proc_fs_context), GFP_KERNEL);
> +	if (!dst) {
> +		return -ENOMEM;
> +	}
> +
> +	dst->lookup_list = kmemdup(dst->lookup_list, dst->lookup_list_len, GFP_KERNEL);
> +	if (!dst->lookup_list) {
> +		kfree(dst);
> +		return -ENOMEM;
> +	}
> +	get_pid_ns(dst->pid_ns);
> +
> +	fc->fs_private = dst;
> +	return 0;
> +}
> +
>  static const struct fs_context_operations proc_fs_context_ops = {
>  	.free		= proc_fs_context_free,
> +	.dup		= proc_fs_context_dup,
>  	.parse_param	= proc_parse_param,
>  	.get_tree	= proc_get_tree,
>  	.reconfigure	= proc_reconfigure,
> @@ -274,6 +377,7 @@ static void proc_kill_sb(struct super_block *sb)
>  
>  	kill_anon_super(sb);
>  	put_pid_ns(fs_info->pid_ns);
> +	kfree(fs_info->lookup_list);
>  	kfree(fs_info);
>  }
>  
> @@ -317,12 +421,33 @@ static int proc_root_getattr(struct user_namespace *mnt_userns,
>  	return 0;
>  }
>  
> +bool in_lookup_list(const struct proc_lookup_list *ll, const char *str, unsigned int len)
> +{
> +	if (ll) {
> +		for (; ll->len > 0; ll = lookup_list_next(ll)) {
> +			if (ll->len == len && strncmp(ll->str, str, len) == 0) {
> +				return true;
> +			}
> +		}
> +		return false;
> +	} else {
> +		return true;
> +	}
> +}
> +
>  static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
>  {
> +	struct proc_fs_info *proc_sb = proc_sb_info(dir->i_sb);
> +
>  	if (!proc_pid_lookup(dentry, flags))
>  		return NULL;
>  
> -	return proc_lookup(dir, dentry, flags);
> +	if (in_lookup_list(proc_sb->lookup_list, dentry->d_name.name, dentry->d_name.len)) {
> +		return proc_lookup(dir, dentry, flags);
> +	} else {
> +		return NULL;
> +	}
> +
>  }
>  
>  static int proc_root_readdir(struct file *file, struct dir_context *ctx)
> diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
> index 069c7fd95396..d2c067560bf9 100644
> --- a/include/linux/proc_fs.h
> +++ b/include/linux/proc_fs.h
> @@ -10,6 +10,7 @@
>  #include <linux/fs.h>
>  
>  struct proc_dir_entry;
> +struct proc_lookup_list;
>  struct seq_file;
>  struct seq_operations;
>  
> @@ -65,6 +66,7 @@ struct proc_fs_info {
>  	kgid_t pid_gid;
>  	enum proc_hidepid hide_pid;
>  	enum proc_pidonly pidonly;
> +	const struct proc_lookup_list *lookup_list;
>  };
>  
>  static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)
> -- 
> 2.31.1
> 

-- 
Rgrds, legion




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux