From: Jan Blunck <jblunck@xxxxxxxxxxxxx> It is possible to use __union_copyup() to support rename of regular files without returning -EXDEV. XXX - Rewrite as copyup to old name followed by rename() + whiteout() Signed-off-by: Jan Blunck <jblunck@xxxxxxxxxxxxx> Signed-off-by: Valerie Aurora <vaurora@xxxxxxxxxx> --- fs/namei.c | 350 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 344 insertions(+), 6 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index e3e8e98..8419e1e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1842,6 +1842,239 @@ out: return res; } +/** + * do_union_hash_lookup() - walk down the union stack and lookup_hash() + * @nd: nameidata of parent to lookup from + * @name: pathname component to lookup + * @path: path to store result of lookup in + * + * Walk down the union stack and search for single pathname component name. It + * is assumed that the caller already did a lookup_hash() in the topmost parent + * that gave negative lookup result. Therefore this does call lookup_hash() in + * every lower layer (!) of the union stack. If a directory is found the union + * stack for that is assembled as well. + * + * Note: + * The caller needs to take care of holding a valid reference to the topmost + * parent. + * On error we leave @path untouched as well as when we don't find anything. + */ +static int do_union_hash_lookup(struct nameidata *nd, struct qstr *name, + struct path *path) +{ + struct path next; + int err = 0; + + while (follow_union_down(&nd->path.mnt, &nd->path.dentry)) { + /* rehash because of d_op->d_hash() by the previous layer */ + name->hash = full_name_hash(name->name, name->len); + + mutex_lock(&nd->path.dentry->d_inode->i_mutex); + err = lookup_hash(nd, name, &next); + mutex_unlock(&nd->path.dentry->d_inode->i_mutex); + + if (err) + break; + + if (next.dentry->d_inode) { + mntget(next.mnt); + if (!S_ISDIR(next.dentry->d_inode->i_mode)) { + *path = next; + break; + } + err = __hash_lookup_build_union(nd, name, &next); + if (err) + path_put(&next); + else + *path = next; + break; + } + + path_put_conditional(&next, nd); + + if ((IS_OPAQUE(nd->path.dentry->d_inode) && + !d_is_fallthru(next.dentry)) || + d_is_whiteout(next.dentry)) + break; + } + + return err; +} + +/** + * _hash_lookup_union() - lookup single pathname component + * @nd: nameidata of parent to lookup from + * @name: pathname component to lookup + * @path: path to store result of lookup in + * + * Returns the topmost parent locked and the target dentry found in the union + * or the topmost negative target dentry otherwise. + * + * Note: + * Returns topmost parent locked even on error. + */ +static int _hash_lookup_union(struct nameidata *nd, struct qstr *name, + struct path *path) +{ + struct path parent = nd->path; + struct path topmost; + int err; + + mutex_lock(&nd->path.dentry->d_inode->i_mutex); + err = lookup_hash(nd, name, path); + if (err) + return err; + + /* return if we found something and it isn't a directory we are done */ + if (path->dentry->d_inode && !S_ISDIR(path->dentry->d_inode->i_mode)) + return 0; + + /* stop lookup if the parent directory is marked opaque */ + if ((IS_OPAQUE(nd->path.dentry->d_inode) && + !d_is_fallthru(path->dentry)) || + d_is_whiteout(path->dentry)) + return 0; + + if (!strcmp(path->mnt->mnt_sb->s_type->name, "proc") || + !strcmp(path->mnt->mnt_sb->s_type->name, "sysfs")) + return 0; + + mutex_unlock(&nd->path.dentry->d_inode->i_mutex); + + /* + * safe a reference to the topmost parent for walking the union stack + */ + path_get(&parent); + topmost = *path; + + if (path->dentry->d_inode && S_ISDIR(path->dentry->d_inode->i_mode)) { + err = __hash_lookup_build_union(nd, name, path); + if (err) + goto err_lock_parent; + goto out_lock_and_revalidate_parent; + } + + err = do_union_hash_lookup(nd, name, path); + if (err) + goto err_lock_parent; + +out_lock_and_revalidate_parent: + /* seems that we haven't found anything, so return the topmost */ + path_to_nameidata(&parent, nd); + mutex_lock(&nd->path.dentry->d_inode->i_mutex); + + if (topmost.dentry == path->dentry) { + spin_lock(&path->dentry->d_lock); + if (nd->path.dentry != path->dentry->d_parent) { + spin_unlock(&path->dentry->d_lock); + dput(path->dentry); + name->hash = full_name_hash(name->name, name->len); + err = lookup_hash(nd, name, path); + if (err) + return err; + /* FIXME: What if we find a directory here ... */ + return err; + } + spin_unlock(&path->dentry->d_lock); + } else + dput(topmost.dentry); + + return 0; + +err_lock_parent: + path_to_nameidata(&parent, nd); + path_put_conditional(path, nd); + mutex_lock(&nd->path.dentry->d_inode->i_mutex); + return err; +} + +/** + * lookup_rename_source() - lookup the source used by rename + * + * This is a special version of _hash_lookup_union() which becomes necessary + * for finding the source of a rename on union mounts. + * + * See comment for _hash_lookup_union() above. + */ +static int lookup_rename_source(struct nameidata *oldnd, + struct nameidata *newnd, + struct dentry **trap, struct qstr *name, + struct path *old) +{ + struct path parent = oldnd->path; + struct path topmost; + int err; + + err = lookup_hash(oldnd, name, old); + if (err) + return err; + + /* return if we found something and it isn't a directory we are done */ + if (old->dentry->d_inode && !S_ISDIR(old->dentry->d_inode->i_mode)) + return 0; + + /* stop lookup if the parent directory is marked opaque */ + if ((IS_OPAQUE(oldnd->path.dentry->d_inode) && + !d_is_fallthru(old->dentry)) || + d_is_whiteout(old->dentry)) + return 0; + + if (!strcmp(old->mnt->mnt_sb->s_type->name, "proc") || + !strcmp(old->mnt->mnt_sb->s_type->name, "sysfs")) + return 0; + + unlock_rename(oldnd->path.dentry, newnd->path.dentry); + + /* + * safe a reference to the topmost parent for walking the union stack + */ + path_get(&parent); + topmost = *old; + + if (old->dentry->d_inode && S_ISDIR(old->dentry->d_inode->i_mode)) { + err = __hash_lookup_build_union(oldnd, name, old); + if (err) + goto err_lock; + goto out_lock_and_revalidate_parent; + } + + err = do_union_hash_lookup(oldnd, name, old); + if (err) + goto err_lock; + +out_lock_and_revalidate_parent: + path_to_nameidata(&parent, oldnd); + *trap = lock_rename(oldnd->path.dentry, newnd->path.dentry); + + /* + * If we return the topmost dentry we have to make sure that it has not + * been moved away while we gave up the topmost parents i_mutex lock. + */ + if (topmost.dentry == old->dentry) { + spin_lock(&old->dentry->d_lock); + if (oldnd->path.dentry != old->dentry->d_parent) { + spin_unlock(&old->dentry->d_lock); + dput(old->dentry); + name->hash = full_name_hash(name->name, name->len); + err = lookup_hash(oldnd, name, old); + if (err) + return err; + /* FIXME: What if we find a directory here ... */ + return err; + } + spin_unlock(&old->dentry->d_lock); + } else + dput(topmost.dentry); + + return 0; + +err_lock: + path_to_nameidata(&parent, oldnd); + path_put_conditional(old, oldnd); + *trap = lock_rename(oldnd->path.dentry, newnd->path.dentry); + return err; +} + static int __lookup_one_len(const char *name, struct qstr *this, struct dentry *base, int len) { @@ -3544,6 +3777,91 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } +static int vfs_rename_union(struct nameidata *oldnd, struct path *old, + struct nameidata *newnd, struct path *new) +{ + struct inode *old_dir = oldnd->path.dentry->d_inode; + struct inode *new_dir = newnd->path.dentry->d_inode; + struct qstr old_name; + char *name; + struct dentry *dentry; + int error; + + if (old->dentry->d_inode == new->dentry->d_inode) + return 0; + error = may_whiteout(old_dir, old->dentry, 0); + if (error) + return error; + if (!old_dir->i_op || !old_dir->i_op->whiteout) + return -EPERM; + + if (!new->dentry->d_inode) + error = may_create(new_dir, new->dentry); + else + error = may_delete(new_dir, new->dentry, 0); + if (error) + return error; + + vfs_dq_init(old_dir); + vfs_dq_init(new_dir); + + error = -EBUSY; + if (d_mountpoint(old->dentry) || d_mountpoint(new->dentry)) + return error; + + error = -ENOMEM; + name = kmalloc(old->dentry->d_name.len, GFP_KERNEL); + if (!name) + return error; + strncpy(name, old->dentry->d_name.name, old->dentry->d_name.len); + name[old->dentry->d_name.len] = 0; + old_name.len = old->dentry->d_name.len; + old_name.hash = old->dentry->d_name.hash; + old_name.name = name; + + /* possibly delete the existing new file */ + if ((newnd->path.dentry == new->dentry->d_parent) && + new->dentry->d_inode) { + /* FIXME: inode may be truncated while we hold a lock */ + error = vfs_unlink(new_dir, new->dentry); + if (error) + goto freename; + + dentry = __lookup_hash(&new->dentry->d_name, + newnd->path.dentry, newnd); + if (IS_ERR(dentry)) + goto freename; + + dput(new->dentry); + new->dentry = dentry; + } + + /* copyup to the new file */ + error = __union_copyup(old, newnd, new); + if (error) + goto freename; + + /* whiteout the old file */ + dentry = __lookup_hash(&old_name, oldnd->path.dentry, oldnd); + error = PTR_ERR(dentry); + if (IS_ERR(dentry)) + goto freename; + error = vfs_whiteout(old_dir, dentry, 0); + dput(dentry); + + /* FIXME: This is acutally unlink() && create() ... */ +/* + if (!error) { + const char *new_name = old_dentry->d_name.name; + fsnotify_move(old_dir, new_dir, old_name.name, new_name, 0, + new_dentry->d_inode, old_dentry->d_inode); + } +*/ +freename: + kfree(old_name.name); + return error; +} + SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, int, newdfd, const char __user *, newname) { @@ -3582,7 +3900,20 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, trap = lock_rename(new_dir, old_dir); - error = hash_lookup_union(&oldnd, &oldnd.last, &old); + /* + * For union mounts we need to call a giant lookup_rename_source() + * instead. + * First lock_rename() and look on the topmost fs like you would do in + * the normal rename, if you find something which is not a directory, + * go ahead and lookup target and do normal rename. + * If you find a negative dentry, unlock_rename() and continue as + * _hash_lookup_union() would do without locking the topmost parent + * at the end. After that do lock_rename() of the source parent and the + * target parent and do a copyup with additional whiteout creation at + * the end. + */ +// error = hash_lookup_union(&oldnd, &oldnd.last, &old); + error = lookup_rename_source(&oldnd, &newnd, &trap, &oldnd.last, &old); if (error) goto exit3; /* source must exist */ @@ -3601,19 +3932,21 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, error = -EINVAL; if (old.dentry == trap) goto exit4; - error = hash_lookup_union(&newnd, &newnd.last, &new); + /* target is always on topmost fs, even with unions */ + error = lookup_hash(&newnd, &newnd.last, &new); if (error) goto exit4; /* target should not be an ancestor of source */ error = -ENOTEMPTY; if (new.dentry == trap) goto exit5; - /* renaming on unions is done by the user-space */ + /* renaming of directories on unions is done by the user-space */ error = -EXDEV; - if (is_unionized(oldnd.path.dentry, oldnd.path.mnt)) - goto exit5; - if (is_unionized(newnd.path.dentry, newnd.path.mnt)) + if (is_unionized(oldnd.path.dentry, oldnd.path.mnt) && + S_ISDIR(old.dentry->d_inode->i_mode)) goto exit5; +// if (is_unionized(newnd.path.dentry, newnd.path.mnt)) +// goto exit5; error = mnt_want_write(oldnd.path.mnt); if (error) @@ -3622,6 +3955,11 @@ SYSCALL_DEFINE4(renameat, int, olddfd, const char __user *, oldname, &newnd.path, new.dentry); if (error) goto exit6; + if (is_unionized(oldnd.path.dentry, oldnd.path.mnt) && + (old.dentry->d_parent != oldnd.path.dentry)) { + error = vfs_rename_union(&oldnd, &old, &newnd, &new); + goto exit6; + } error = vfs_rename(old_dir->d_inode, old.dentry, new_dir->d_inode, new.dentry); exit6: -- 1.6.3.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html