Re: [REVIEW][PATCH 2/4] proc: Implement /proc/thread-self to point at the directory of the current thread

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Quoting Eric W. Biederman (ebiederm@xxxxxxxxxxxx):
> 
> /proc/thread-self is derived from /proc/self.  /proc/thread-self
> points to the directory in proc containing information about the
> current thread.
> 
> This funtionality has been missing for a long time, and is tricky to
> implement in userspace as gettid() is not exported by glibc.  More
> importantly this allows fixing defects in /proc/mounts and /proc/net
> where in a threaded application today they wind up being empty files
> when only the initial pthread has exited, causing problems for other
> threads.
> 
> Signed-off-by: "Eric W. Biederman" <ebiederm@xxxxxxxxxxxx>

Hi Eric,

I've not had a chance to test these, but apart from two trivial
comments below these look good to me, and I appreciate the feature.
So with the two fixes (if needed),

Acked-by: Serge Hallyn <serge.hallyn@xxxxxxxxxxxxx>

> ---
>  fs/proc/Makefile              |  1 +
>  fs/proc/base.c                | 15 +++++---
>  fs/proc/inode.c               |  7 +++-
>  fs/proc/internal.h            |  6 +++
>  fs/proc/root.c                |  3 ++
>  fs/proc/thread_self.c         | 85 +++++++++++++++++++++++++++++++++++++++++++
>  include/linux/pid_namespace.h |  1 +
>  7 files changed, 112 insertions(+), 6 deletions(-)
>  create mode 100644 fs/proc/thread_self.c
> 
> diff --git a/fs/proc/Makefile b/fs/proc/Makefile
> index 239493ec718e..7151ea428041 100644
> --- a/fs/proc/Makefile
> +++ b/fs/proc/Makefile
> @@ -23,6 +23,7 @@ proc-y	+= version.o
>  proc-y	+= softirqs.o
>  proc-y	+= namespaces.o
>  proc-y	+= self.o
> +proc-y	+= thread_self.o
>  proc-$(CONFIG_PROC_SYSCTL)	+= proc_sysctl.o
>  proc-$(CONFIG_NET)		+= proc_net.o
>  proc-$(CONFIG_PROC_KCORE)	+= kcore.o
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index ed34e405c6b9..0131156ce7c9 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -2847,7 +2847,7 @@ retry:
>  	return iter;
>  }
>  
> -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
> +#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 2)
>  
>  /* for the /proc/ directory itself, after non-process stuff has been done */
>  int proc_pid_readdir(struct file *file, struct dir_context *ctx)
> @@ -2859,14 +2859,19 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
>  	if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
>  		return 0;
>  
> -	if (pos == TGID_OFFSET - 1) {
> +	if (pos == TGID_OFFSET - 2) {
>  		struct inode *inode = ns->proc_self->d_inode;
>  		if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
>  			return 0;
> -		iter.tgid = 0;
> -	} else {
> -		iter.tgid = pos - TGID_OFFSET;
> +		ctx->pos = pos = pos + 1;
> +	}
> +	if (pos == TGID_OFFSET - 1) {
> +		struct inode *inode = ns->proc_thread_self->d_inode;
> +		if (!dir_emit(ctx, "thread-self", 11, inode->i_ino, DT_LNK))
> +			return 0;
> +		ctx->pos = pos = pos + 1;
>  	}
> +	iter.tgid = pos - TGID_OFFSET;
>  	iter.task = NULL;
>  	for (iter = next_tgid(ns, iter);
>  	     iter.task;
> diff --git a/fs/proc/inode.c b/fs/proc/inode.c
> index 0adbc02d60e3..333080d7a671 100644
> --- a/fs/proc/inode.c
> +++ b/fs/proc/inode.c
> @@ -442,6 +442,7 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
>  int proc_fill_super(struct super_block *s)
>  {
>  	struct inode *root_inode;
> +	int ret;
>  
>  	s->s_flags |= MS_NODIRATIME | MS_NOSUID | MS_NOEXEC;
>  	s->s_blocksize = 1024;
> @@ -463,5 +464,9 @@ int proc_fill_super(struct super_block *s)
>  		return -ENOMEM;
>  	}
>  
> -	return proc_setup_self(s);
> +	ret = proc_setup_self(s);
> +	if (ret) {
> +		return ret;
> +	}
> +	return proc_setup_thread_self(s);
>  }
> diff --git a/fs/proc/internal.h b/fs/proc/internal.h
> index 3ab6d14e71c5..ee04619173b2 100644
> --- a/fs/proc/internal.h
> +++ b/fs/proc/internal.h
> @@ -234,6 +234,12 @@ static inline int proc_net_init(void) { return 0; }
>  extern int proc_setup_self(struct super_block *);
>  
>  /*
> + * proc_thread_self.c
> + */
> +extern int proc_setup_thread_self(struct super_block *);
> +extern void proc_thread_self_init(void);
> +
> +/*
>   * proc_sysctl.c
>   */
>  #ifdef CONFIG_PROC_SYSCTL
> diff --git a/fs/proc/root.c b/fs/proc/root.c
> index 5dbadecb234d..48f1c03bc7ed 100644
> --- a/fs/proc/root.c
> +++ b/fs/proc/root.c
> @@ -149,6 +149,8 @@ static void proc_kill_sb(struct super_block *sb)
>  	ns = (struct pid_namespace *)sb->s_fs_info;
>  	if (ns->proc_self)
>  		dput(ns->proc_self);
> +	if (ns->proc_thread_self)
> +		dput(ns->proc_thread_self);
>  	kill_anon_super(sb);
>  	put_pid_ns(ns);
>  }
> @@ -170,6 +172,7 @@ void __init proc_root_init(void)
>  		return;
>  
>  	proc_self_init();
> +	proc_thread_self_init();
>  	proc_symlink("mounts", NULL, "self/mounts");
>  
>  	proc_net_init();
> diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c
> new file mode 100644
> index 000000000000..59075b509df3
> --- /dev/null
> +++ b/fs/proc/thread_self.c
> @@ -0,0 +1,85 @@
> +#include <linux/sched.h>
> +#include <linux/namei.h>
> +#include <linux/slab.h>
> +#include <linux/pid_namespace.h>
> +#include "internal.h"
> +
> +/*
> + * /proc/thread_self:
> + */
> +static int proc_thread_self_readlink(struct dentry *dentry, char __user *buffer,
> +			      int buflen)
> +{
> +	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
> +	pid_t tgid = task_tgid_nr_ns(current, ns);
> +	pid_t pid = task_pid_nr_ns(current, ns);
> +	char tmp[PROC_NUMBUF + 6 + PROC_NUMBUF];

In the extreme case you're not adding space for a \0 ?  (Unless
PROC_NUMBUF includes that)

> +	if (!pid)
> +		return -ENOENT;
> +	sprintf(tmp, "%d/task/%d", tgid, pid);
> +	return readlink_copy(buffer, buflen, tmp);
> +}
> +
> +static void *proc_thread_self_follow_link(struct dentry *dentry, struct nameidata *nd)
> +{
> +	struct pid_namespace *ns = dentry->d_sb->s_fs_info;
> +	pid_t tgid = task_tgid_nr_ns(current, ns);
> +	pid_t pid = task_pid_nr_ns(current, ns);
> +	char *name = ERR_PTR(-ENOENT);
> +	if (pid) {
> +		name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, GFP_KERNEL);

Same here.

> +		if (!name)
> +			name = ERR_PTR(-ENOMEM);
> +		else
> +			sprintf(name, "%d/task/%d", tgid, pid);
> +	}
> +	nd_set_link(nd, name);
> +	return NULL;
> +}
> +
> +static const struct inode_operations proc_thread_self_inode_operations = {
> +	.readlink	= proc_thread_self_readlink,
> +	.follow_link	= proc_thread_self_follow_link,
> +	.put_link	= kfree_put_link,
> +};
> +
> +static unsigned thread_self_inum;
> +
> +int proc_setup_thread_self(struct super_block *s)
> +{
> +	struct inode *root_inode = s->s_root->d_inode;
> +	struct pid_namespace *ns = s->s_fs_info;
> +	struct dentry *thread_self;
> +
> +	mutex_lock(&root_inode->i_mutex);
> +	thread_self = d_alloc_name(s->s_root, "thread-self");
> +	if (thread_self) {
> +		struct inode *inode = new_inode_pseudo(s);
> +		if (inode) {
> +			inode->i_ino = thread_self_inum;
> +			inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
> +			inode->i_mode = S_IFLNK | S_IRWXUGO;
> +			inode->i_uid = GLOBAL_ROOT_UID;
> +			inode->i_gid = GLOBAL_ROOT_GID;
> +			inode->i_op = &proc_thread_self_inode_operations;
> +			d_add(thread_self, inode);
> +		} else {
> +			dput(thread_self);
> +			thread_self = ERR_PTR(-ENOMEM);
> +		}
> +	} else {
> +		thread_self = ERR_PTR(-ENOMEM);
> +	}
> +	mutex_unlock(&root_inode->i_mutex);
> +	if (IS_ERR(thread_self)) {
> +		pr_err("proc_fill_super: can't allocate /proc/thread_self\n");
> +		return PTR_ERR(thread_self);
> +	}
> +	ns->proc_thread_self = thread_self;
> +	return 0;
> +}
> +
> +void __init proc_thread_self_init(void)
> +{
> +	proc_alloc_inum(&thread_self_inum);
> +}
> diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
> index 7246ef3d4455..1997ffc295a7 100644
> --- a/include/linux/pid_namespace.h
> +++ b/include/linux/pid_namespace.h
> @@ -33,6 +33,7 @@ struct pid_namespace {
>  #ifdef CONFIG_PROC_FS
>  	struct vfsmount *proc_mnt;
>  	struct dentry *proc_self;
> +	struct dentry *proc_thread_self;
>  #endif
>  #ifdef CONFIG_BSD_PROCESS_ACCT
>  	struct bsd_acct_struct *bacct;
> -- 
> 1.9.1
> 
> _______________________________________________
> Containers mailing list
> Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx
> https://lists.linuxfoundation.org/mailman/listinfo/containers
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux