[AppArmor 31/41] Fix __d_path() for lazy unmounts and make it unambiguous; exclude unreachable mount points from /proc/mounts

jjohansen@xxxxxxx · Thu, 12 Apr 2007 02:08:40 -0700

First, when d_path() hits a lazily unmounted mount point, it tries to prepend
the name of the lazily unmounted dentry to the path name.  It gets this wrong,
and also overwrites the slash that separates the name from the following
pathname component.

Second, it isn't always possible to tell from the __d_path result whether the
specified root and rootmnt (i.e., the chroot) was reached: lazy unmounts of
bind mounts will produce a path that does start with a non-slash so we can
tell from that, but other lazy unmounts will produce a path that starts with a
slash, just like "ordinary" paths.

Third, sys_getcwd() shouldn't return disconnected paths.  The patch checks for
that, and makes it fail with -ENOENT in that case.

Fourth, this now allows us to tell unreachable mount points from reachable
ones when generating the /proc/mounts and /proc/$pid/mountstats files. 
Unreachable mount points are not interesting to processes (they can't get
there, anyway), so we hide unreachable mounts.  In particular, ordinary
processes also will no longer see the rootfs mount (it is unreachable, after
all).  The rootfs mount point will still be reachable to processes like the
initial initrd init process, and so those processes will continue to see this
mount point.

The attached patch cleans up __d_path() to fix the bug with overlapping
pathname components.  It also adds a @fail_deleted argument, which allows to
get rid of some of the mess in sys_getcwd().  We make sure that paths will
only start with a slash if the path leads all the way up to the root.  If the
resulting path would otherwise be empty, we return "." instead so that some
users of seq_path for files in /proc won't break.

The @fail_deleted argument allows sys_getcwd() to be simplified.  Grabbing the
dcache_lock can be moved into __d_path().

The @fail_deleted argument could be added to d_path() as well: this would
allow callers to recognize deleted files without having to resort to the
ambiguous check for the " (deleted)" string at the end of the pathnames.  This
is not currently done, but it might be worthwhile.

This patch also removes some code duplication between mounts_open() and
mountstats_open().

Signed-off-by: Andreas Gruenbacher <agruen@xxxxxxx>
Reviewed-by: NeilBrown <neilb@xxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>

---
 fs/dcache.c    |  157 +++++++++++++++++++++++++++++++--------------------------
 fs/namespace.c |   23 +++++++-
 fs/proc/base.c |   52 +++++++-----------
 3 files changed, 131 insertions(+), 101 deletions(-)

--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1732,52 +1732,51 @@ shouldnt_be_hashed:
 }
 
 /**
- * d_path - return the path of a dentry
+ * __d_path - return the path of a dentry
  * @dentry: dentry to report
  * @vfsmnt: vfsmnt to which the dentry belongs
  * @root: root dentry
  * @rootmnt: vfsmnt to which the root dentry belongs
  * @buffer: buffer to return value in
  * @buflen: buffer length
+ * @fail_deleted: what to return for deleted files
  *
- * Convert a dentry into an ASCII path name. If the entry has been deleted
+ * Convert a dentry into an ASCII path name. If the entry has been deleted,
+ * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise,
  * the string " (deleted)" is appended. Note that this is ambiguous.
  *
- * Returns the buffer or an error code if the path was too long.
+ * If @dentry is not connected to @root, the path returned will be relative
+ * (i.e., it will not start with a slash).
  *
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * Returns the buffer or an error code.
  */
-static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt,
-			struct dentry *root, struct vfsmount *rootmnt,
-			char *buffer, int buflen)
-{
-	char * end = buffer+buflen;
-	char * retval;
-	int namelen;
+static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
+		      struct dentry *root, struct vfsmount *rootmnt,
+		      char *buffer, int buflen, int fail_deleted)
+{
+	int namelen, is_slash;
+
+	if (buflen < 2)
+		return ERR_PTR(-ENAMETOOLONG);
+	buffer += --buflen;
+	*buffer = '\0';
 
-	*--end = '\0';
-	buflen--;
+	spin_lock(&dcache_lock);
 	if (!IS_ROOT(dentry) && d_unhashed(dentry)) {
-		buflen -= 10;
-		end -= 10;
-		if (buflen < 0)
+		if (fail_deleted) {
+			buffer = ERR_PTR(-ENOENT);
+			goto out;
+		}
+		if (buflen < 10)
 			goto Elong;
-		memcpy(end, " (deleted)", 10);
+		buflen -= 10;
+		buffer -= 10;
+		memcpy(buffer, " (deleted)", 10);
 	}
-
-	if (buflen < 1)
-		goto Elong;
-	/* Get '/' right */
-	retval = end-1;
-	*retval = '/';
-
-	for (;;) {
+	while (dentry != root || vfsmnt != rootmnt) {
 		struct dentry * parent;
 
-		if (dentry == root && vfsmnt == rootmnt)
-			break;
 		if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) {
-			/* Global root? */
 			spin_lock(&vfsmount_lock);
 			if (vfsmnt->mnt_parent == vfsmnt) {
 				spin_unlock(&vfsmount_lock);
@@ -1791,33 +1790,63 @@ static char * __d_path( struct dentry *d
 		parent = dentry->d_parent;
 		prefetch(parent);
 		namelen = dentry->d_name.len;
-		buflen -= namelen + 1;
-		if (buflen < 0)
+		if (buflen < namelen + 1)
 			goto Elong;
-		end -= namelen;
-		memcpy(end, dentry->d_name.name, namelen);
-		*--end = '/';
-		retval = end;
+		buflen -= namelen + 1;
+		buffer -= namelen;
+		memcpy(buffer, dentry->d_name.name, namelen);
+		*--buffer = '/';
 		dentry = parent;
 	}
+	/* Get '/' right. */
+	if (*buffer != '/')
+		*--buffer = '/';
 
-	return retval;
+out:
+	spin_unlock(&dcache_lock);
+	return buffer;
 
 global_root:
+	/*
+	 * We went past the (vfsmount, dentry) we were looking for and have
+	 * either hit a root dentry, a lazily unmounted dentry, an
+	 * unconnected dentry, or the file is on a pseudo filesystem.
+	 */
 	namelen = dentry->d_name.len;
-	buflen -= namelen;
-	if (buflen < 0)
+	is_slash = (namelen == 1 && *dentry->d_name.name == '/');
+	if (is_slash || (dentry->d_sb->s_flags & MS_NOUSER)) {
+		/*
+		 * Make sure we won't return a pathname starting with '/'.
+		 *
+		 * Historically, we also glue together the root dentry and
+		 * remaining name for pseudo filesystems like pipefs, which
+		 * have the MS_NOUSER flag set. This results in pathnames
+		 * like "pipe:[439336]".
+		 */
+		if (*buffer == '/') {
+			buffer++;
+			buflen++;
+		}
+		if (is_slash) {
+			if (*buffer == '\0')
+				*--buffer = '.';
+			goto out;
+		}
+	}
+	if (buflen < namelen)
 		goto Elong;
-	retval -= namelen-1;	/* hit the slash */
-	memcpy(retval, dentry->d_name.name, namelen);
-	return retval;
+	buffer -= namelen;
+	memcpy(buffer, dentry->d_name.name, namelen);
+	goto out;
+
 Elong:
-	return ERR_PTR(-ENAMETOOLONG);
+	buffer = ERR_PTR(-ENAMETOOLONG);
+	goto out;
 }
 
 /* write full pathname into buffer and return start of pathname */
-char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt,
-				char *buf, int buflen)
+char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf,
+	     int buflen)
 {
 	char *res;
 	struct vfsmount *rootmnt;
@@ -1827,9 +1856,7 @@ char * d_path(struct dentry *dentry, str
 	rootmnt = mntget(current->fs->rootmnt);
 	root = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
-	spin_lock(&dcache_lock);
-	res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen);
-	spin_unlock(&dcache_lock);
+	res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0);
 	dput(root);
 	mntput(rootmnt);
 	return res;
@@ -1855,10 +1882,10 @@ char * d_path(struct dentry *dentry, str
  */
 asmlinkage long sys_getcwd(char __user *buf, unsigned long size)
 {
-	int error;
+	int error, len;
 	struct vfsmount *pwdmnt, *rootmnt;
 	struct dentry *pwd, *root;
-	char *page = (char *) __get_free_page(GFP_USER);
+	char *page = (char *) __get_free_page(GFP_USER), *cwd;
 
 	if (!page)
 		return -ENOMEM;
@@ -1870,29 +1897,21 @@ asmlinkage long sys_getcwd(char __user *
 	root = dget(current->fs->root);
 	read_unlock(&current->fs->lock);
 
+	cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1);
+	error = PTR_ERR(cwd);
+	if (IS_ERR(cwd))
+		goto out;
 	error = -ENOENT;
-	/* Has the current directory has been unlinked? */
-	spin_lock(&dcache_lock);
-	if (pwd->d_parent == pwd || !d_unhashed(pwd)) {
-		unsigned long len;
-		char * cwd;
-
-		cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE);
-		spin_unlock(&dcache_lock);
-
-		error = PTR_ERR(cwd);
-		if (IS_ERR(cwd))
-			goto out;
+	if (*cwd != '/')
+		goto out;
 
-		error = -ERANGE;
-		len = PAGE_SIZE + page - cwd;
-		if (len <= size) {
-			error = len;
-			if (copy_to_user(buf, cwd, len))
-				error = -EFAULT;
-		}
-	} else
-		spin_unlock(&dcache_lock);
+	error = -ERANGE;
+	len = PAGE_SIZE + page - cwd;
+	if (len <= size) {
+		error = len;
+		if (copy_to_user(buf, cwd, len))
+			error = -EFAULT;
+	}
 
 out:
 	dput(pwd);
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -348,8 +348,16 @@ static inline void mangle(struct seq_fil
 	seq_escape(m, s, " \t\n\\");
 }
 
+/* Keep in sync with fs/proc/base.c! */
+struct proc_mounts {
+        struct seq_file m;
+        void *page;
+        int event;
+};
+
 static int show_vfsmnt(struct seq_file *m, void *v)
 {
+	void *page = container_of(m, struct proc_mounts, m)->page;
 	struct vfsmount *mnt = v;
 	int err = 0;
 	static struct proc_fs_info {
@@ -371,10 +379,15 @@ static int show_vfsmnt(struct seq_file *
 		{ 0, NULL }
 	};
 	struct proc_fs_info *fs_infop;
+	char *path;
+
+	path = d_path(mnt->mnt_root, mnt, page, PAGE_SIZE);
+	if (IS_ERR(path) || *path != '/')
+		return err;
 
 	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
 	seq_putc(m, ' ');
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
 	seq_putc(m, ' ');
 	mangle(m, mnt->mnt_sb->s_type->name);
 	seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw");
@@ -401,8 +414,14 @@ struct seq_operations mounts_op = {
 
 static int show_vfsstat(struct seq_file *m, void *v)
 {
+	void *page = container_of(m, struct proc_mounts, m)->page;
 	struct vfsmount *mnt = v;
 	int err = 0;
+	char *path;
+
+	path = d_path(mnt->mnt_root, mnt, page, PAGE_SIZE);
+	if (IS_ERR(path) || *path != '/')
+		return err; /* error or path unreachable from chroot */
 
 	/* device */
 	if (mnt->mnt_devname) {
@@ -413,7 +432,7 @@ static int show_vfsstat(struct seq_file 
 
 	/* mount point */
 	seq_puts(m, " mounted on ");
-	seq_path(m, mnt, mnt->mnt_root, " \t\n\\");
+	mangle(m, path);
 	seq_putc(m, ' ');
 
 	/* file system type */
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -353,13 +353,16 @@ static const struct inode_operations pro
 	.setattr	= proc_setattr,
 };
 
+/* Keep in sync with fs/namespace.c! */
 extern struct seq_operations mounts_op;
 struct proc_mounts {
 	struct seq_file m;
+	void *page;
 	int event;
 };
 
-static int mounts_open(struct inode *inode, struct file *file)
+static int __mounts_open(struct inode *inode, struct file *file,
+			 struct seq_operations *seq_ops)
 {
 	struct task_struct *task = get_proc_task(inode);
 	struct mnt_namespace *ns = NULL;
@@ -382,12 +385,16 @@ static int mounts_open(struct inode *ino
 		p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL);
 		if (p) {
 			file->private_data = &p->m;
-			ret = seq_open(file, &mounts_op);
+			p->page = (void *)__get_free_page(GFP_KERNEL);
+			if (p->page)
+				ret = seq_open(file, seq_ops);
 			if (!ret) {
 				p->m.private = ns;
 				p->event = ns->event;
 				return 0;
 			}
+			if (p->page)
+				free_page((unsigned long)p->page);
 			kfree(p);
 		}
 		put_mnt_ns(ns);
@@ -395,17 +402,26 @@ static int mounts_open(struct inode *ino
 	return ret;
 }
 
+static int mounts_open(struct inode *inode, struct file *file)
+{
+	return __mounts_open(inode, file, &mounts_op);
+}
+
 static int mounts_release(struct inode *inode, struct file *file)
 {
-	struct seq_file *m = file->private_data;
-	struct mnt_namespace *ns = m->private;
+	struct proc_mounts *p =
+		container_of(file->private_data, struct proc_mounts, m);
+	struct mnt_namespace *ns = p->m.private;
+
+	free_page((unsigned long)p->page);
 	put_mnt_ns(ns);
 	return seq_release(inode, file);
 }
 
 static unsigned mounts_poll(struct file *file, poll_table *wait)
 {
-	struct proc_mounts *p = file->private_data;
+	struct proc_mounts *p =
+		container_of(file->private_data, struct proc_mounts, m);
 	struct mnt_namespace *ns = p->m.private;
 	unsigned res = 0;
 
@@ -432,31 +448,7 @@ static const struct file_operations proc
 extern struct seq_operations mountstats_op;
 static int mountstats_open(struct inode *inode, struct file *file)
 {
-	int ret = seq_open(file, &mountstats_op);
-
-	if (!ret) {
-		struct seq_file *m = file->private_data;
-		struct mnt_namespace *mnt_ns = NULL;
-		struct task_struct *task = get_proc_task(inode);
-
-		if (task) {
-			task_lock(task);
-			if (task->nsproxy)
-				mnt_ns = task->nsproxy->mnt_ns;
-			if (mnt_ns)
-				get_mnt_ns(mnt_ns);
-			task_unlock(task);
-			put_task_struct(task);
-		}
-
-		if (mnt_ns)
-			m->private = mnt_ns;
-		else {
-			seq_release(inode, file);
-			ret = -EINVAL;
-		}
-	}
-	return ret;
+	return __mounts_open(inode, file, &mountstats_op);
 }
 
 static const struct file_operations proc_mountstats_operations = {

-- 
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html