From: Jan Blunck <jblunck@xxxxxxx> This patch adds the basic structures of VFS based union mounts. It is a new implementation based on some of my old ideas that influenced Bharata B Rao <bharata@xxxxxxxxxxxxxxxxxx> who came up with the proposal to let the union_mount struct only point to the next layer in the union stack. I rewrote nearly all of the central patches around lookup and the dcache interaction. Advantages of the new implementation: - the new union stack is no longer tied directly to one dentry - the union stack enables dentries to be part of more than one union (bind mounts) - it is unnecessary to traverse the union stack when de/referencing a dentry - caching of union stack information still driven by dentry cache XXX - is_unionized() is pretty heavy-weight for non-union file systems on a union mount-enabled kernel. May be simplified by assuming one or more of: - Two layers only - One-to-one association between layers (doesn't union submounts) - Writable layer mounted in only one place Signed-off-by: Jan Blunck <jblunck@xxxxxxx> Signed-off-by: Valerie Aurora <vaurora@xxxxxxxxxx> --- fs/Kconfig | 13 ++ fs/Makefile | 1 + fs/dcache.c | 4 + fs/union.c | 332 ++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/dcache.h | 9 ++ include/linux/union.h | 61 +++++++++ 6 files changed, 420 insertions(+), 0 deletions(-) create mode 100644 fs/union.c create mode 100644 include/linux/union.h diff --git a/fs/Kconfig b/fs/Kconfig index 0e7da7b..3e4f664 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -58,6 +58,19 @@ source "fs/notify/Kconfig" source "fs/quota/Kconfig" +config UNION_MOUNT + bool "Writable overlays (union mounts) (EXPERIMENTAL)" + depends on EXPERIMENTAL + help + Writable overlays allow you to mount a transparent writable + layer over a read-only file system, for example, an ext3 + partition on a hard drive over a CD-ROM root file system + image. + + See <file:Documentation/filesystems/union-mounts.txt> for details. + + If unsure, say N. + source "fs/autofs/Kconfig" source "fs/autofs4/Kconfig" source "fs/fuse/Kconfig" diff --git a/fs/Makefile b/fs/Makefile index af6d047..4ed672e 100644 --- a/fs/Makefile +++ b/fs/Makefile @@ -52,6 +52,7 @@ obj-$(CONFIG_NFS_COMMON) += nfs_common/ obj-$(CONFIG_GENERIC_ACL) += generic_acl.o obj-y += quota/ +obj-$(CONFIG_UNION_MOUNT) += union.o obj-$(CONFIG_PROC_FS) += proc/ obj-y += partitions/ diff --git a/fs/dcache.c b/fs/dcache.c index 1fae1df..56bd05f 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1046,6 +1046,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) INIT_LIST_HEAD(&dentry->d_lru); INIT_LIST_HEAD(&dentry->d_subdirs); INIT_LIST_HEAD(&dentry->d_alias); +#ifdef CONFIG_UNION_MOUNT + INIT_LIST_HEAD(&dentry->d_unions); + dentry->d_unionized = 0; +#endif if (parent) { dentry->d_parent = dget(parent); diff --git a/fs/union.c b/fs/union.c new file mode 100644 index 0000000..d1950c2 --- /dev/null +++ b/fs/union.c @@ -0,0 +1,332 @@ +/* + * VFS based union mount for Linux + * + * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright (C) 2007-2009 Novell Inc. + * + * Author(s): Jan Blunck (j.blunck@xxxxxxxxxxxxx) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <linux/bootmem.h> +#include <linux/init.h> +#include <linux/types.h> +#include <linux/hash.h> +#include <linux/fs.h> +#include <linux/mount.h> +#include <linux/fs_struct.h> +#include <linux/union.h> + +/* + * This is borrowed from fs/inode.c. The hashtable for lookups. Somebody + * should try to make this good - I've just made it work. + */ +static unsigned int union_hash_mask __read_mostly; +static unsigned int union_hash_shift __read_mostly; +static struct hlist_head *union_hashtable __read_mostly; +static unsigned int union_rhash_mask __read_mostly; +static unsigned int union_rhash_shift __read_mostly; +static struct hlist_head *union_rhashtable __read_mostly; + +/* + * Locking Rules: + * - dcache_lock (for union_rlookup() only) + * - union_lock + */ +DEFINE_SPINLOCK(union_lock); + +static struct kmem_cache *union_cache __read_mostly; + +static unsigned long hash(struct dentry *dentry, struct vfsmount *mnt) +{ + unsigned long tmp; + + tmp = ((unsigned long)mnt * (unsigned long)dentry) ^ + (GOLDEN_RATIO_PRIME + (unsigned long)mnt) / L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> union_hash_shift); + return tmp & union_hash_mask; +} + +static __initdata unsigned long union_hash_entries; + +static int __init set_union_hash_entries(char *str) +{ + if (!str) + return 0; + union_hash_entries = simple_strtoul(str, &str, 0); + return 1; +} + +__setup("union_hash_entries=", set_union_hash_entries); + +static int __init init_union(void) +{ + int loop; + + union_cache = KMEM_CACHE(union_mount, SLAB_PANIC | SLAB_MEM_SPREAD); + union_hashtable = alloc_large_system_hash("Union-cache", + sizeof(struct hlist_head), + union_hash_entries, + 14, + 0, + &union_hash_shift, + &union_hash_mask, + 0); + + for (loop = 0; loop < (1 << union_hash_shift); loop++) + INIT_HLIST_HEAD(&union_hashtable[loop]); + + + union_rhashtable = alloc_large_system_hash("rUnion-cache", + sizeof(struct hlist_head), + union_hash_entries, + 14, + 0, + &union_rhash_shift, + &union_rhash_mask, + 0); + + for (loop = 0; loop < (1 << union_rhash_shift); loop++) + INIT_HLIST_HEAD(&union_rhashtable[loop]); + + return 0; +} + +fs_initcall(init_union); + +struct union_mount *union_alloc(struct dentry *this, struct vfsmount *this_mnt, + struct dentry *next, struct vfsmount *next_mnt) +{ + struct union_mount *um; + + BUG_ON(!S_ISDIR(this->d_inode->i_mode)); + BUG_ON(!S_ISDIR(next->d_inode->i_mode)); + + um = kmem_cache_alloc(union_cache, GFP_ATOMIC); + if (!um) + return NULL; + + atomic_set(&um->u_count, 1); + INIT_LIST_HEAD(&um->u_unions); + INIT_HLIST_NODE(&um->u_hash); + INIT_HLIST_NODE(&um->u_rhash); + + um->u_this.mnt = this_mnt; + um->u_this.dentry = this; + um->u_next.mnt = mntget(next_mnt); + um->u_next.dentry = dget(next); + + return um; +} + +struct union_mount *union_get(struct union_mount *um) +{ + BUG_ON(!atomic_read(&um->u_count)); + atomic_inc(&um->u_count); + return um; +} + +static int __union_put(struct union_mount *um) +{ + if (!atomic_dec_and_test(&um->u_count)) + return 0; + + BUG_ON(!hlist_unhashed(&um->u_hash)); + BUG_ON(!hlist_unhashed(&um->u_rhash)); + + kmem_cache_free(union_cache, um); + return 1; +} + +void union_put(struct union_mount *um) +{ + struct path tmp = um->u_next; + + if (__union_put(um)) + path_put(&tmp); +} + +static void __union_hash(struct union_mount *um) +{ + hlist_add_head(&um->u_hash, union_hashtable + + hash(um->u_this.dentry, um->u_this.mnt)); + hlist_add_head(&um->u_rhash, union_rhashtable + + hash(um->u_next.dentry, um->u_next.mnt)); +} + +static void __union_unhash(struct union_mount *um) +{ + hlist_del_init(&um->u_hash); + hlist_del_init(&um->u_rhash); +} + +struct union_mount *union_lookup(struct dentry *dentry, struct vfsmount *mnt) +{ + struct hlist_head *head = union_hashtable + hash(dentry, mnt); + struct hlist_node *node; + struct union_mount *um; + + hlist_for_each_entry(um, node, head, u_hash) { + if ((um->u_this.dentry == dentry) && + (um->u_this.mnt == mnt)) + return um; + } + + return NULL; +} + +struct union_mount *union_rlookup(struct dentry *dentry, struct vfsmount *mnt) +{ + struct hlist_head *head = union_rhashtable + hash(dentry, mnt); + struct hlist_node *node; + struct union_mount *um; + + hlist_for_each_entry(um, node, head, u_rhash) { + if ((um->u_next.dentry == dentry) && + (um->u_next.mnt == mnt)) + return um; + } + + return NULL; +} + +/* + * is_unionized - check if a dentry lives on a union mounted file system + * + * This tests if a dentry is living on an union mounted file system by walking + * the file system hierarchy. + */ +int is_unionized(struct dentry *dentry, struct vfsmount *mnt) +{ + struct path this = { .mnt = mntget(mnt), + .dentry = dget(dentry) }; + struct vfsmount *tmp; + + do { + /* check if there is an union mounted on top of us */ + spin_lock(&vfsmount_lock); + list_for_each_entry(tmp, &this.mnt->mnt_mounts, mnt_child) { + if (!(tmp->mnt_flags & MNT_UNION)) + continue; + /* Isn't this a bug? */ + if (this.dentry->d_sb != tmp->mnt_mountpoint->d_sb) + continue; + if (is_subdir(this.dentry, tmp->mnt_mountpoint)) { + spin_unlock(&vfsmount_lock); + path_put(&this); + return 1; + } + } + spin_unlock(&vfsmount_lock); + + /* check our mountpoint next */ + tmp = mntget(this.mnt->mnt_parent); + dput(this.dentry); + this.dentry = dget(this.mnt->mnt_mountpoint); + mntput(this.mnt); + this.mnt = tmp; + } while (this.mnt != this.mnt->mnt_parent); + + path_put(&this); + return 0; +} + +int append_to_union(struct vfsmount *mnt, struct dentry *dentry, + struct vfsmount *dest_mnt, struct dentry *dest_dentry) +{ + struct union_mount *this, *um; + + BUG_ON(!IS_MNT_UNION(mnt)); + + this = union_alloc(dentry, mnt, dest_dentry, dest_mnt); + if (!this) + return -ENOMEM; + + spin_lock(&union_lock); + um = union_lookup(dentry, mnt); + if (um) { + BUG_ON((um->u_next.dentry != dest_dentry) || + (um->u_next.mnt != dest_mnt)); + spin_unlock(&union_lock); + union_put(this); + return 0; + } + __union_hash(this); + spin_unlock(&union_lock); + return 0; +} + +/* + * follow_union_down - follow the union stack one layer down + * + * This is called to traverse the union stack from one layer to the next + * overlayed one. follow_union_down() is called by various lookup functions + * that are aware of union mounts. + * + * Returns non-zero if followed to the next layer, zero otherwise. + */ +int follow_union_down(struct vfsmount **mnt, struct dentry **dentry) +{ + struct union_mount *um; + + if (!IS_MNT_UNION(*mnt)) + return 0; + + spin_lock(&union_lock); + um = union_lookup(*dentry, *mnt); + spin_unlock(&union_lock); + if (um) { + path_get(&um->u_next); + dput(*dentry); + *dentry = um->u_next.dentry; + mntput(*mnt); + *mnt = um->u_next.mnt; + return 1; + } + return 0; +} + +/* + * follow_union_mount - follow the union stack to the topmost layer + * + * This is called to traverse the union stack to the topmost layer. This is + * necessary for following parent pointers in an union mount. + * + * Returns none zero if followed to the topmost layer, zero otherwise. + */ +int follow_union_mount(struct vfsmount **mnt, struct dentry **dentry) +{ + struct union_mount *um; + int res = 0; + + while (IS_UNION(*dentry)) { + spin_lock(&dcache_lock); + spin_lock(&union_lock); + um = union_rlookup(*dentry, *mnt); + if (um) + path_get(&um->u_this); + spin_unlock(&union_lock); + spin_unlock(&dcache_lock); + + /* + * Q: Aaargh, how do I validate the topmost dentry pointer? + * A: Eeeeasy! We took the dcache_lock and union_lock. Since + * this protects from any dput'ng going on, we know that the + * dentry is valid since the union is unhashed under + * dcache_lock too. + */ + if (!um) + break; + dput(*dentry); + *dentry = um->u_this.dentry; + mntput(*mnt); + *mnt = um->u_this.mnt; + res = 1; + } + + return res; +} diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7648b49..4d48c20 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -101,6 +101,15 @@ struct dentry { struct dentry *d_parent; /* parent directory */ struct qstr d_name; +#ifdef CONFIG_UNION_MOUNT + /* + * The following fields are used by the VFS based union mount + * implementation. Both are protected by union_lock! + */ + struct list_head d_unions; /* list of union_mount's */ + unsigned int d_unionized; /* unions referencing this dentry */ +#endif + struct list_head d_lru; /* LRU list */ /* * d_child and d_rcu can share memory diff --git a/include/linux/union.h b/include/linux/union.h new file mode 100644 index 0000000..0c85312 --- /dev/null +++ b/include/linux/union.h @@ -0,0 +1,61 @@ +/* + * VFS based union mount for Linux + * + * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH. + * Copyright (C) 2007 Novell Inc. + * Author(s): Jan Blunck (j.blunck@xxxxxxxxxxxxx) + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef __LINUX_UNION_H +#define __LINUX_UNION_H +#ifdef __KERNEL__ + +#include <linux/list.h> +#include <asm/atomic.h> + +struct dentry; +struct vfsmount; + +#ifdef CONFIG_UNION_MOUNT + +/* + * The new union mount structure. + */ +struct union_mount { + atomic_t u_count; /* reference count */ + struct mutex u_mutex; + struct list_head u_unions; /* list head for d_unions */ + struct hlist_node u_hash; /* list head for searching */ + struct hlist_node u_rhash; /* list head for reverse searching */ + + struct path u_this; /* this is me */ + struct path u_next; /* this is what I overlay */ +}; + +#define IS_UNION(dentry) (!list_empty(&(dentry)->d_unions) || \ + (dentry)->d_unionized) +#define IS_MNT_UNION(mnt) ((mnt)->mnt_flags & MNT_UNION) + +extern int is_unionized(struct dentry *, struct vfsmount *); +extern int append_to_union(struct vfsmount *, struct dentry *, + struct vfsmount *, struct dentry *); +extern int follow_union_down(struct vfsmount **, struct dentry **); +extern int follow_union_mount(struct vfsmount **, struct dentry **); + +#else /* CONFIG_UNION_MOUNT */ + +#define IS_UNION(x) (0) +#define IS_MNT_UNION(x) (0) +#define is_unionized(x, y) (0) +#define append_to_union(x1, y1, x2, y2) ({ BUG(); (0); }) +#define follow_union_down(x, y) ({ (0); }) +#define follow_union_mount(x, y) ({ (0); }) + +#endif /* CONFIG_UNION_MOUNT */ +#endif /* __KERNEL__ */ +#endif /* __LINUX_UNION_H */ -- 1.6.3.3 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html