First, when d_path() hits a lazily unmounted mount point, it tries to prepend the name of the lazily unmounted dentry to the path name. It gets this wrong, and also overwrites the slash that separates the name from the following pathname component. Second, it isn't always possible to tell from the __d_path result whether the specified root and rootmnt (i.e., the chroot) was reached: lazy unmounts of bind mounts will produce a path that does start with a non-slash so we can tell from that, but other lazy unmounts will produce a path that starts with a slash, just like "ordinary" paths. Third, sys_getcwd() shouldn't return disconnected paths. The patch checks for that, and makes it fail with -ENOENT in that case. Fourth, this now allows us to tell unreachable mount points from reachable ones when generating the /proc/mounts and /proc/$pid/mountstats files. Unreachable mount points are not interesting to processes (they can't get there, anyway), so we hide unreachable mounts. In particular, ordinary processes also will no longer see the rootfs mount (it is unreachable, after all). The rootfs mount point will still be reachable to processes like the initial initrd init process, and so those processes will continue to see this mount point. The attached patch cleans up __d_path() to fix the bug with overlapping pathname components. It also adds a @fail_deleted argument, which allows to get rid of some of the mess in sys_getcwd(). We make sure that paths will only start with a slash if the path leads all the way up to the root. If the resulting path would otherwise be empty, we return "." instead so that some users of seq_path for files in /proc won't break. The @fail_deleted argument allows sys_getcwd() to be simplified. Grabbing the dcache_lock can be moved into __d_path(). The @fail_deleted argument could be added to d_path() as well: this would allow callers to recognize deleted files without having to resort to the ambiguous check for the " (deleted)" string at the end of the pathnames. This is not currently done, but it might be worthwhile. This patch also removes some code duplication between mounts_open() and mountstats_open(). Signed-off-by: Andreas Gruenbacher <agruen@xxxxxxx> Reviewed-by: NeilBrown <neilb@xxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- fs/dcache.c | 157 +++++++++++++++++++++++++++++++-------------------------- fs/namespace.c | 23 +++++++- fs/proc/base.c | 52 +++++++----------- 3 files changed, 131 insertions(+), 101 deletions(-) --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1732,52 +1732,51 @@ shouldnt_be_hashed: } /** - * d_path - return the path of a dentry + * __d_path - return the path of a dentry * @dentry: dentry to report * @vfsmnt: vfsmnt to which the dentry belongs * @root: root dentry * @rootmnt: vfsmnt to which the root dentry belongs * @buffer: buffer to return value in * @buflen: buffer length + * @fail_deleted: what to return for deleted files * - * Convert a dentry into an ASCII path name. If the entry has been deleted + * Convert a dentry into an ASCII path name. If the entry has been deleted, + * then if @fail_deleted is true, ERR_PTR(-ENOENT) is returned. Otherwise, * the string " (deleted)" is appended. Note that this is ambiguous. * - * Returns the buffer or an error code if the path was too long. + * If @dentry is not connected to @root, the path returned will be relative + * (i.e., it will not start with a slash). * - * "buflen" should be positive. Caller holds the dcache_lock. + * Returns the buffer or an error code. */ -static char * __d_path( struct dentry *dentry, struct vfsmount *vfsmnt, - struct dentry *root, struct vfsmount *rootmnt, - char *buffer, int buflen) -{ - char * end = buffer+buflen; - char * retval; - int namelen; +static char *__d_path(struct dentry *dentry, struct vfsmount *vfsmnt, + struct dentry *root, struct vfsmount *rootmnt, + char *buffer, int buflen, int fail_deleted) +{ + int namelen, is_slash; + + if (buflen < 2) + return ERR_PTR(-ENAMETOOLONG); + buffer += --buflen; + *buffer = '\0'; - *--end = '\0'; - buflen--; + spin_lock(&dcache_lock); if (!IS_ROOT(dentry) && d_unhashed(dentry)) { - buflen -= 10; - end -= 10; - if (buflen < 0) + if (fail_deleted) { + buffer = ERR_PTR(-ENOENT); + goto out; + } + if (buflen < 10) goto Elong; - memcpy(end, " (deleted)", 10); + buflen -= 10; + buffer -= 10; + memcpy(buffer, " (deleted)", 10); } - - if (buflen < 1) - goto Elong; - /* Get '/' right */ - retval = end-1; - *retval = '/'; - - for (;;) { + while (dentry != root || vfsmnt != rootmnt) { struct dentry * parent; - if (dentry == root && vfsmnt == rootmnt) - break; if (dentry == vfsmnt->mnt_root || IS_ROOT(dentry)) { - /* Global root? */ spin_lock(&vfsmount_lock); if (vfsmnt->mnt_parent == vfsmnt) { spin_unlock(&vfsmount_lock); @@ -1791,33 +1790,63 @@ static char * __d_path( struct dentry *d parent = dentry->d_parent; prefetch(parent); namelen = dentry->d_name.len; - buflen -= namelen + 1; - if (buflen < 0) + if (buflen < namelen + 1) goto Elong; - end -= namelen; - memcpy(end, dentry->d_name.name, namelen); - *--end = '/'; - retval = end; + buflen -= namelen + 1; + buffer -= namelen; + memcpy(buffer, dentry->d_name.name, namelen); + *--buffer = '/'; dentry = parent; } + /* Get '/' right. */ + if (*buffer != '/') + *--buffer = '/'; - return retval; +out: + spin_unlock(&dcache_lock); + return buffer; global_root: + /* + * We went past the (vfsmount, dentry) we were looking for and have + * either hit a root dentry, a lazily unmounted dentry, an + * unconnected dentry, or the file is on a pseudo filesystem. + */ namelen = dentry->d_name.len; - buflen -= namelen; - if (buflen < 0) + is_slash = (namelen == 1 && *dentry->d_name.name == '/'); + if (is_slash || (dentry->d_sb->s_flags & MS_NOUSER)) { + /* + * Make sure we won't return a pathname starting with '/'. + * + * Historically, we also glue together the root dentry and + * remaining name for pseudo filesystems like pipefs, which + * have the MS_NOUSER flag set. This results in pathnames + * like "pipe:[439336]". + */ + if (*buffer == '/') { + buffer++; + buflen++; + } + if (is_slash) { + if (*buffer == '\0') + *--buffer = '.'; + goto out; + } + } + if (buflen < namelen) goto Elong; - retval -= namelen-1; /* hit the slash */ - memcpy(retval, dentry->d_name.name, namelen); - return retval; + buffer -= namelen; + memcpy(buffer, dentry->d_name.name, namelen); + goto out; + Elong: - return ERR_PTR(-ENAMETOOLONG); + buffer = ERR_PTR(-ENAMETOOLONG); + goto out; } /* write full pathname into buffer and return start of pathname */ -char * d_path(struct dentry *dentry, struct vfsmount *vfsmnt, - char *buf, int buflen) +char *d_path(struct dentry *dentry, struct vfsmount *vfsmnt, char *buf, + int buflen) { char *res; struct vfsmount *rootmnt; @@ -1827,9 +1856,7 @@ char * d_path(struct dentry *dentry, str rootmnt = mntget(current->fs->rootmnt); root = dget(current->fs->root); read_unlock(¤t->fs->lock); - spin_lock(&dcache_lock); - res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen); - spin_unlock(&dcache_lock); + res = __d_path(dentry, vfsmnt, root, rootmnt, buf, buflen, 0); dput(root); mntput(rootmnt); return res; @@ -1855,10 +1882,10 @@ char * d_path(struct dentry *dentry, str */ asmlinkage long sys_getcwd(char __user *buf, unsigned long size) { - int error; + int error, len; struct vfsmount *pwdmnt, *rootmnt; struct dentry *pwd, *root; - char *page = (char *) __get_free_page(GFP_USER); + char *page = (char *) __get_free_page(GFP_USER), *cwd; if (!page) return -ENOMEM; @@ -1870,29 +1897,21 @@ asmlinkage long sys_getcwd(char __user * root = dget(current->fs->root); read_unlock(¤t->fs->lock); + cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE, 1); + error = PTR_ERR(cwd); + if (IS_ERR(cwd)) + goto out; error = -ENOENT; - /* Has the current directory has been unlinked? */ - spin_lock(&dcache_lock); - if (pwd->d_parent == pwd || !d_unhashed(pwd)) { - unsigned long len; - char * cwd; - - cwd = __d_path(pwd, pwdmnt, root, rootmnt, page, PAGE_SIZE); - spin_unlock(&dcache_lock); - - error = PTR_ERR(cwd); - if (IS_ERR(cwd)) - goto out; + if (*cwd != '/') + goto out; - error = -ERANGE; - len = PAGE_SIZE + page - cwd; - if (len <= size) { - error = len; - if (copy_to_user(buf, cwd, len)) - error = -EFAULT; - } - } else - spin_unlock(&dcache_lock); + error = -ERANGE; + len = PAGE_SIZE + page - cwd; + if (len <= size) { + error = len; + if (copy_to_user(buf, cwd, len)) + error = -EFAULT; + } out: dput(pwd); --- a/fs/namespace.c +++ b/fs/namespace.c @@ -348,8 +348,16 @@ static inline void mangle(struct seq_fil seq_escape(m, s, " \t\n\\"); } +/* Keep in sync with fs/proc/base.c! */ +struct proc_mounts { + struct seq_file m; + void *page; + int event; +}; + static int show_vfsmnt(struct seq_file *m, void *v) { + void *page = container_of(m, struct proc_mounts, m)->page; struct vfsmount *mnt = v; int err = 0; static struct proc_fs_info { @@ -371,10 +379,15 @@ static int show_vfsmnt(struct seq_file * { 0, NULL } }; struct proc_fs_info *fs_infop; + char *path; + + path = d_path(mnt->mnt_root, mnt, page, PAGE_SIZE); + if (IS_ERR(path) || *path != '/') + return err; mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none"); seq_putc(m, ' '); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + mangle(m, path); seq_putc(m, ' '); mangle(m, mnt->mnt_sb->s_type->name); seq_puts(m, mnt->mnt_sb->s_flags & MS_RDONLY ? " ro" : " rw"); @@ -401,8 +414,14 @@ struct seq_operations mounts_op = { static int show_vfsstat(struct seq_file *m, void *v) { + void *page = container_of(m, struct proc_mounts, m)->page; struct vfsmount *mnt = v; int err = 0; + char *path; + + path = d_path(mnt->mnt_root, mnt, page, PAGE_SIZE); + if (IS_ERR(path) || *path != '/') + return err; /* error or path unreachable from chroot */ /* device */ if (mnt->mnt_devname) { @@ -413,7 +432,7 @@ static int show_vfsstat(struct seq_file /* mount point */ seq_puts(m, " mounted on "); - seq_path(m, mnt, mnt->mnt_root, " \t\n\\"); + mangle(m, path); seq_putc(m, ' '); /* file system type */ --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -353,13 +353,16 @@ static const struct inode_operations pro .setattr = proc_setattr, }; +/* Keep in sync with fs/namespace.c! */ extern struct seq_operations mounts_op; struct proc_mounts { struct seq_file m; + void *page; int event; }; -static int mounts_open(struct inode *inode, struct file *file) +static int __mounts_open(struct inode *inode, struct file *file, + struct seq_operations *seq_ops) { struct task_struct *task = get_proc_task(inode); struct mnt_namespace *ns = NULL; @@ -382,12 +385,16 @@ static int mounts_open(struct inode *ino p = kmalloc(sizeof(struct proc_mounts), GFP_KERNEL); if (p) { file->private_data = &p->m; - ret = seq_open(file, &mounts_op); + p->page = (void *)__get_free_page(GFP_KERNEL); + if (p->page) + ret = seq_open(file, seq_ops); if (!ret) { p->m.private = ns; p->event = ns->event; return 0; } + if (p->page) + free_page((unsigned long)p->page); kfree(p); } put_mnt_ns(ns); @@ -395,17 +402,26 @@ static int mounts_open(struct inode *ino return ret; } +static int mounts_open(struct inode *inode, struct file *file) +{ + return __mounts_open(inode, file, &mounts_op); +} + static int mounts_release(struct inode *inode, struct file *file) { - struct seq_file *m = file->private_data; - struct mnt_namespace *ns = m->private; + struct proc_mounts *p = + container_of(file->private_data, struct proc_mounts, m); + struct mnt_namespace *ns = p->m.private; + + free_page((unsigned long)p->page); put_mnt_ns(ns); return seq_release(inode, file); } static unsigned mounts_poll(struct file *file, poll_table *wait) { - struct proc_mounts *p = file->private_data; + struct proc_mounts *p = + container_of(file->private_data, struct proc_mounts, m); struct mnt_namespace *ns = p->m.private; unsigned res = 0; @@ -432,31 +448,7 @@ static const struct file_operations proc extern struct seq_operations mountstats_op; static int mountstats_open(struct inode *inode, struct file *file) { - int ret = seq_open(file, &mountstats_op); - - if (!ret) { - struct seq_file *m = file->private_data; - struct mnt_namespace *mnt_ns = NULL; - struct task_struct *task = get_proc_task(inode); - - if (task) { - task_lock(task); - if (task->nsproxy) - mnt_ns = task->nsproxy->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - task_unlock(task); - put_task_struct(task); - } - - if (mnt_ns) - m->private = mnt_ns; - else { - seq_release(inode, file); - ret = -EINVAL; - } - } - return ret; + return __mounts_open(inode, file, &mountstats_op); } static const struct file_operations proc_mountstats_operations = { -- - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html