Hi Alexey,
On 11/22/21 11:23, Alexey Dobriyan wrote:
Docker implements MaskedPaths configuration option
https://github.com/estesp/docker/blob/9c15e82f19b0ad3c5fe8617a8ec2dddc6639f40a/oci/defaults.go#L97
to disable certain /proc files. It does overmount with /dev/null per
masked file.
Give them proper mount option which selectively disables lookup/readdir
so that MaskedPaths doesn't need to be updated as time goes on.
Syntax is
mount -t proc proc -o lookup=cpuinfo/uptime /proc
# ls /proc
...
dr-xr-xr-x 8 root root 0 Nov 22 21:12 995
-r--r--r-- 1 root root 0 Nov 22 21:12 cpuinfo
lrwxrwxrwx 1 root root 0 Nov 22 21:12 self -> 1163
lrwxrwxrwx 1 root root 0 Nov 22 21:12 thread-self -> 1163/task/1163
-r--r--r-- 1 root root 0 Nov 22 21:12 uptime
Works at top level only (1 lookup list per superblock)
Trailing slash is optional but saves 1 allocation.
TODO:
think what to do with dcache entries across "mount -o remount,lookup=".
Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
---
fs/proc/generic.c | 19 +++++--
fs/proc/internal.h | 23 +++++++++
fs/proc/proc_net.c | 2
fs/proc/root.c | 115 ++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/proc_fs.h | 2
5 files changed, 152 insertions(+), 9 deletions(-)
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -282,7 +282,7 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
* for success..
*/
int proc_readdir_de(struct file *file, struct dir_context *ctx,
- struct proc_dir_entry *de)
+ struct proc_dir_entry *de, const struct proc_lookup_list *ll)
{
int i;
@@ -305,14 +305,18 @@ int proc_readdir_de(struct file *file, struct dir_context *ctx,
do {
struct proc_dir_entry *next;
+
pde_get(de);
read_unlock(&proc_subdir_lock);
- if (!dir_emit(ctx, de->name, de->namelen,
- de->low_ino, de->mode >> 12)) {
- pde_put(de);
- return 0;
+
+ if (ll ? in_lookup_list(ll, de->name, de->namelen) : true) {
This looks a bit odd, what about the following?
if (!ll || in_lookup_list(ll, de->name, de->namelen))
But this is maybe just a personal preference.
+ if (!dir_emit(ctx, de->name, de->namelen, de->low_ino, de->mode >> 12)) {
+ pde_put(de);
+ return 0;
+ }
+ ctx->pos++;
}
- ctx->pos++;
+
read_lock(&proc_subdir_lock);
next = pde_subdir_next(de);
pde_put(de);
@@ -330,7 +334,8 @@ int proc_readdir(struct file *file, struct dir_context *ctx)
if (fs_info->pidonly == PROC_PIDONLY_ON)
return 1;
- return proc_readdir_de(file, ctx, PDE(inode));
+ return proc_readdir_de(file, ctx, PDE(inode),
+ PDE(inode) == &proc_root ? fs_info->lookup_list : NULL);
}
/*
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -190,7 +190,7 @@ struct proc_dir_entry *proc_register(struct proc_dir_entry *dir,
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *);
extern int proc_readdir(struct file *, struct dir_context *);
-int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *);
+int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *, const struct proc_lookup_list *);
static inline void pde_get(struct proc_dir_entry *pde)
{
@@ -318,3 +318,24 @@ static inline void pde_force_lookup(struct proc_dir_entry *pde)
/* /proc/net/ entries can be changed under us by setns(CLONE_NEWNET) */
pde->proc_dops = &proc_net_dentry_ops;
}
+
+/*
+ * "cpuinfo", "uptime" is represented as
+ *
+ * (u8[]){
+ * 7, 'c', 'p', 'u', 'i', 'n', 'f', 'o',
+ * 6, 'u', 'p', 't', 'i', 'm', 'e',
+ * 0
+ * }
+ */
+struct proc_lookup_list {
+ u8 len;
+ char str[];
+};
+
+static inline struct proc_lookup_list *lookup_list_next(const struct proc_lookup_list *ll)
+{
+ return (struct proc_lookup_list *)((void *)ll + 1 + ll->len);
+}
+
+bool in_lookup_list(const struct proc_lookup_list *ll, const char *str, unsigned int len);
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -321,7 +321,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
ret = -EINVAL;
net = get_proc_task_net(file_inode(file));
if (net != NULL) {
- ret = proc_readdir_de(file, ctx, net->proc_net);
+ ret = proc_readdir_de(file, ctx, net->proc_net, NULL);
put_net(net);
}
return ret;
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -35,18 +35,22 @@ struct proc_fs_context {
enum proc_hidepid hidepid;
int gid;
enum proc_pidonly pidonly;
+ struct proc_lookup_list *lookup_list;
+ unsigned int lookup_list_len;
};
enum proc_param {
Opt_gid,
Opt_hidepid,
Opt_subset,
+ Opt_lookup,
};
static const struct fs_parameter_spec proc_fs_parameters[] = {
fsparam_u32("gid", Opt_gid),
fsparam_string("hidepid", Opt_hidepid),
fsparam_string("subset", Opt_subset),
+ fsparam_string("lookup", Opt_lookup),
{}
};
@@ -112,6 +116,65 @@ static int proc_parse_subset_param(struct fs_context *fc, char *value)
return 0;
}
+static int proc_parse_lookup_param(struct fs_context *fc, char *str0)
+{
+ struct proc_fs_context *ctx = fc->fs_private;
+ struct proc_lookup_list *ll;
+ char *str;
+ const char *slash;
+ const char *src;
+ unsigned int len;
+ int rv;
+
+ /* Force trailing slash, simplify loops below. */
+ len = strlen(str0);
+ if (len > 0 && str0[len - 1] == '/') {
+ str = str0;
+ } else {
+ str = kmalloc(len + 2, GFP_KERNEL);
+ if (!str) {
+ rv = -ENOMEM;
+ goto out;
+ }
+ memcpy(str, str0, len);
+ str[len] = '/';
+ str[len + 1] = '\0';
+ }
+
+ len = 0;
+ for (src = str; (slash = strchr(src, '/')); src = slash + 1) {
+ if (slash - src >= 256) {
+ rv = -EINVAL;
+ goto out_free_str;
+ }
+ len += 1 + (slash - src);
+ }
+ len += 1;
+
+ ctx->lookup_list = ll = kmalloc(len, GFP_KERNEL);
+ ctx->lookup_list_len = len;
+ if (!ll) {
+ rv = -ENOMEM;
+ goto out_free_str;
+ }
+
+ for (src = str; (slash = strchr(src, '/')); src = slash + 1) {
+ ll->len = slash - src;
+ memcpy(ll->str, src, ll->len);
+ ll = lookup_list_next(ll);
+ }
+ ll->len = 0;
+
+ rv = 0;
+
+out_free_str:
+ if (str != str0) {
+ kfree(str);
+ }
+out:
+ return rv;
+}
+
static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct proc_fs_context *ctx = fc->fs_private;
@@ -137,6 +200,11 @@ static int proc_parse_param(struct fs_context *fc, struct fs_parameter *param)
return -EINVAL;
break;
+ case Opt_lookup:
+ if (proc_parse_lookup_param(fc, param->string) < 0)
+ return -EINVAL;
+ break;
+
default:
return -EINVAL;
}
@@ -157,6 +225,10 @@ static void proc_apply_options(struct proc_fs_info *fs_info,
fs_info->hide_pid = ctx->hidepid;
if (ctx->mask & (1 << Opt_subset))
fs_info->pidonly = ctx->pidonly;
+ if (ctx->mask & (1 << Opt_lookup)) {
+ fs_info->lookup_list = ctx->lookup_list;
+ ctx->lookup_list = NULL;
+ }
}
static int proc_fill_super(struct super_block *s, struct fs_context *fc)
@@ -234,11 +306,34 @@ static void proc_fs_context_free(struct fs_context *fc)
struct proc_fs_context *ctx = fc->fs_private;
put_pid_ns(ctx->pid_ns);
+ kfree(ctx->lookup_list);
kfree(ctx);
}
+static int proc_fs_context_dup(struct fs_context *fc, struct fs_context *src_fc)
+{
+ struct proc_fs_context *src = fc->fs_private;
+ struct proc_fs_context *dst;
+
+ dst = kmemdup(src, sizeof(struct proc_fs_context), GFP_KERNEL);
+ if (!dst) {
+ return -ENOMEM;
+ }
+
+ get_pid_ns(dst->pid_ns);
+ dst->lookup_list = kmemdup(dst->lookup_list, dst->lookup_list_len, GFP_KERNEL);
If dst->lookup_list is NULL and dst->lookup_list_len is 0 (which seems
to be the default state if lookup= is not passed), then kmemdup will end
up calling __kmalloc_track_caller() with size 0. Each of the sl[auo]b
implementations returns ZERO_SIZE_POINTER in this case, which is just
((void *)16).
This is safe to pass to kfree et al. I was worried that if
ZERO_SIZE_POINTER gets copied into the proc_fs_info, then it could get
dereferenced and cause a nasty crash. But that can only happen if
Opt_lookup is in ctx->mask, at which point we're guaranteed that
lookup_list is not ZERO_SIZE_POINTER.
Just wanted to document why this is safe, as it caught my eye at first.
+ if (!dst->lookup_list) {
+ kfree(dst);
+ return -ENOMEM;
+ }
+
+ fc->fs_private = dst;
+ return 0;
+}
+
static const struct fs_context_operations proc_fs_context_ops = {
.free = proc_fs_context_free,
+ .dup = proc_fs_context_dup,
From my reading of vfs_dup_fs_context, it seems like if the dup()
operation doesn't exist, the operation is simply bailed out and not
supported.
339 struct fs_context *vfs_dup_fs_context(struct fs_context *src_fc)
340 {
341 struct fs_context *fc;
342 int ret;
343
344 if (!src_fc->ops->dup)
345 return ERR_PTR(-EOPNOTSUPP);
So this patch is also adding support for the dup() operation where there
wasn't before. Is that significant to call out in the changelog or split
out as a separate patch?
Thanks,
Stephen
.parse_param = proc_parse_param,
.get_tree = proc_get_tree,
.reconfigure = proc_reconfigure,
@@ -274,6 +369,7 @@ static void proc_kill_sb(struct super_block *sb)
kill_anon_super(sb);
put_pid_ns(fs_info->pid_ns);
+ kfree(fs_info->lookup_list);
kfree(fs_info);
}
@@ -317,11 +413,30 @@ static int proc_root_getattr(struct user_namespace *mnt_userns,
return 0;
}
+bool in_lookup_list(const struct proc_lookup_list *ll, const char *str, unsigned int len)
+{
+ while (ll->len > 0) {
+ if (ll->len == len && strncmp(ll->str, str, len) == 0) {
+ return true;
+ }
+ ll = lookup_list_next(ll);
+ }
+ return false;
+}
+
static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentry, unsigned int flags)
{
+ struct proc_fs_info *proc_sb = proc_sb_info(dir->i_sb);
+
if (!proc_pid_lookup(dentry, flags))
return NULL;
+ /* Top level only for now */
+ if (proc_sb->lookup_list &&
+ !in_lookup_list(proc_sb->lookup_list, dentry->d_name.name, dentry->d_name.len)) {
+ return NULL;
+ }
+
return proc_lookup(dir, dentry, flags);
}
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -10,6 +10,7 @@
#include <linux/fs.h>
struct proc_dir_entry;
+struct proc_lookup_list;
struct seq_file;
struct seq_operations;
@@ -65,6 +66,7 @@ struct proc_fs_info {
kgid_t pid_gid;
enum proc_hidepid hide_pid;
enum proc_pidonly pidonly;
+ const struct proc_lookup_list *lookup_list;
};
static inline struct proc_fs_info *proc_sb_info(struct super_block *sb)