Re: [PATCH 1/6] union-mount: Introduce union_mount structure and basic operations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue,  2 Mar 2010, Valerie Aurora wrote:
> From: Jan Blunck <jblunck@xxxxxxx>
> 
> This patch adds the basic structures and operations of VFS-based union
> mounts (but not the ability to mount or lookup unioned file systems).
> Each directory in a unioned file system has an associated union stack
> created when the directory is first looked up.  The union stack is a
> structure kept in a hash table indexed by mount and dentry of the
> directory; thus, specific paths are unioned, not dentries alone.  The
> union stack keeps a pointer to the upper path and the lower path and
> can be looked up by either path.
> 
> This particular version of union mounts is based on ideas by Jan
> Blunck, Bharata Rao, and many others.
> 
> Signed-off-by: Jan Blunck <jblunck@xxxxxxx>
> Signed-off-by: Valerie Aurora <vaurora@xxxxxxxxxx>
> ---
>  fs/Kconfig             |   13 ++
>  fs/Makefile            |    1 +
>  fs/dcache.c            |    4 +
>  fs/union.c             |  290 ++++++++++++++++++++++++++++++++++++++++++++++++
>  include/linux/dcache.h |   20 ++++
>  include/linux/mount.h  |    3 +
>  include/linux/union.h  |   54 +++++++++
>  7 files changed, 385 insertions(+), 0 deletions(-)
>  create mode 100644 fs/union.c
>  create mode 100644 include/linux/union.h
> 
> diff --git a/fs/Kconfig b/fs/Kconfig
> index 64d44ef..303186b 100644
> --- a/fs/Kconfig
> +++ b/fs/Kconfig
> @@ -59,6 +59,19 @@ source "fs/notify/Kconfig"
>  
>  source "fs/quota/Kconfig"
>  
> +config UNION_MOUNT
> +       bool "Writable overlays (union mounts) (EXPERIMENTAL)"
> +       depends on EXPERIMENTAL
> +       help
> +         Writable overlays allow you to mount a transparent writable
> +	 layer over a read-only file system, for example, an ext3
> +	 partition on a hard drive over a CD-ROM root file system
> +	 image.
> +
> +	 See <file:Documentation/filesystems/union-mounts.txt> for details.
> +
> +	 If unsure, say N.
> +
>  source "fs/autofs/Kconfig"
>  source "fs/autofs4/Kconfig"
>  source "fs/fuse/Kconfig"
> diff --git a/fs/Makefile b/fs/Makefile
> index af6d047..4ed672e 100644
> --- a/fs/Makefile
> +++ b/fs/Makefile
> @@ -52,6 +52,7 @@ obj-$(CONFIG_NFS_COMMON)	+= nfs_common/
>  obj-$(CONFIG_GENERIC_ACL)	+= generic_acl.o
>  
>  obj-y				+= quota/
> +obj-$(CONFIG_UNION_MOUNT)	+= union.o
>  
>  obj-$(CONFIG_PROC_FS)		+= proc/
>  obj-y				+= partitions/
> diff --git a/fs/dcache.c b/fs/dcache.c
> index d14c304..0c2dd32 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -960,6 +960,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
>  	INIT_LIST_HEAD(&dentry->d_lru);
>  	INIT_LIST_HEAD(&dentry->d_subdirs);
>  	INIT_LIST_HEAD(&dentry->d_alias);
> +#ifdef CONFIG_UNION_MOUNT
> +	INIT_LIST_HEAD(&dentry->d_unions);
> +	dentry->d_unionized = 0;
> +#endif
>  
>  	if (parent) {
>  		dentry->d_parent = dget(parent);
> diff --git a/fs/union.c b/fs/union.c
> new file mode 100644
> index 0000000..2e005d9
> --- /dev/null
> +++ b/fs/union.c
> @@ -0,0 +1,290 @@
> +/*
> + * VFS based union mount for Linux
> + *
> + * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
> + * Copyright (C) 2007-2009 Novell Inc.
> + *
> + *   Author(s): Jan Blunck (j.blunck@xxxxxxxxxxxxx)
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the Free
> + * Software Foundation; either version 2 of the License, or (at your option)
> + * any later version.
> + */
> +
> +#include <linux/bootmem.h>
> +#include <linux/init.h>
> +#include <linux/types.h>
> +#include <linux/hash.h>
> +#include <linux/fs.h>
> +#include <linux/mount.h>
> +#include <linux/fs_struct.h>
> +#include <linux/union.h>
> +
> +/*
> + * This is borrowed from fs/inode.c. The hashtable for lookups. Somebody
> + * should try to make this good - I've just made it work.
> + */
> +static unsigned int union_hash_mask __read_mostly;
> +static unsigned int union_hash_shift __read_mostly;
> +static struct hlist_head *union_hashtable __read_mostly;
> +static unsigned int union_rhash_mask __read_mostly;
> +static unsigned int union_rhash_shift __read_mostly;
> +static struct hlist_head *union_rhashtable __read_mostly;
> +
> +/*
> + * Locking Rules:
> + * - dcache_lock (for union_rlookup() only)
> + * - union_lock
> + */
> +DEFINE_SPINLOCK(union_lock);
> +
> +static struct kmem_cache *union_cache __read_mostly;
> +
> +static unsigned long hash(struct dentry *dentry, struct vfsmount *mnt)
> +{
> +	unsigned long tmp;
> +
> +	tmp = ((unsigned long)mnt * (unsigned long)dentry) ^
> +		(GOLDEN_RATIO_PRIME + (unsigned long)mnt) / L1_CACHE_BYTES;
> +	tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> union_hash_shift);
> +	return tmp & union_hash_mask;
> +}
> +
> +static __initdata unsigned long union_hash_entries;
> +
> +static int __init set_union_hash_entries(char *str)
> +{
> +	if (!str)
> +		return 0;
> +	union_hash_entries = simple_strtoul(str, &str, 0);
> +	return 1;
> +}
> +
> +__setup("union_hash_entries=", set_union_hash_entries);
> +
> +static int __init init_union(void)
> +{
> +	int loop;
> +
> +	union_cache = KMEM_CACHE(union_mount, SLAB_PANIC | SLAB_MEM_SPREAD);
> +	union_hashtable = alloc_large_system_hash("Union-cache",
> +						  sizeof(struct hlist_head),
> +						  union_hash_entries,
> +						  14,
> +						  0,
> +						  &union_hash_shift,
> +						  &union_hash_mask,
> +						  0);
> +
> +	for (loop = 0; loop < (1 << union_hash_shift); loop++)
> +		INIT_HLIST_HEAD(&union_hashtable[loop]);
> +
> +
> +	union_rhashtable = alloc_large_system_hash("rUnion-cache",
> +						  sizeof(struct hlist_head),
> +						  union_hash_entries,
> +						  14,
> +						  0,
> +						  &union_rhash_shift,
> +						  &union_rhash_mask,
> +						  0);
> +
> +	for (loop = 0; loop < (1 << union_rhash_shift); loop++)
> +		INIT_HLIST_HEAD(&union_rhashtable[loop]);
> +
> +	return 0;
> +}
> +
> +fs_initcall(init_union);
> +
> +struct union_mount *union_alloc(struct dentry *this, struct vfsmount *this_mnt,
> +				struct dentry *next, struct vfsmount *next_mnt)


Why doesn't union_alloc, append_to_union, union_lookup,
union_down_one, etc use "struct path *" arg instead of separate
vfsmount and dentry pointers?


> +{
> +	struct union_mount *um;
> +
> +	BUG_ON(!S_ISDIR(this->d_inode->i_mode));
> +	BUG_ON(!S_ISDIR(next->d_inode->i_mode));
> +
> +	um = kmem_cache_alloc(union_cache, GFP_ATOMIC);
> +	if (!um)
> +		return NULL;
> +
> +	atomic_set(&um->u_count, 1);

Why is u_count not a "struct kref"?


> +	INIT_LIST_HEAD(&um->u_unions);
> +	INIT_HLIST_NODE(&um->u_hash);
> +	INIT_HLIST_NODE(&um->u_rhash);
> +
> +	um->u_this.mnt = this_mnt;
> +	um->u_this.dentry = this;
> +	um->u_next.mnt = mntget(next_mnt);
> +	um->u_next.dentry = dget(next);
> +
> +	return um;
> +}
> +
> +struct union_mount *union_get(struct union_mount *um)
> +{
> +	BUG_ON(!atomic_read(&um->u_count));
> +	atomic_inc(&um->u_count);
> +	return um;
> +}
> +
> +static int __union_put(struct union_mount *um)
> +{
> +	if (!atomic_dec_and_test(&um->u_count))
> +		return 0;
> +
> +	BUG_ON(!hlist_unhashed(&um->u_hash));
> +	BUG_ON(!hlist_unhashed(&um->u_rhash));
> +
> +	kmem_cache_free(union_cache, um);
> +	return 1;
> +}
> +
> +void union_put(struct union_mount *um)
> +{
> +	struct path tmp = um->u_next;
> +
> +	if (__union_put(um))
> +		path_put(&tmp);
> +}
> +
> +static void __union_hash(struct union_mount *um)
> +{
> +	hlist_add_head(&um->u_hash, union_hashtable +
> +		       hash(um->u_this.dentry, um->u_this.mnt));
> +	hlist_add_head(&um->u_rhash, union_rhashtable +
> +		       hash(um->u_next.dentry, um->u_next.mnt));
> +}
> +
> +static void __union_unhash(struct union_mount *um)
> +{
> +	hlist_del_init(&um->u_hash);
> +	hlist_del_init(&um->u_rhash);
> +}
> +
> +struct union_mount *union_lookup(struct dentry *dentry, struct vfsmount *mnt)
> +{
> +	struct hlist_head *head = union_hashtable + hash(dentry, mnt);
> +	struct hlist_node *node;
> +	struct union_mount *um;
> +
> +	hlist_for_each_entry(um, node, head, u_hash) {
> +		if ((um->u_this.dentry == dentry) &&
> +		    (um->u_this.mnt == mnt))
> +			return um;
> +	}
> +
> +	return NULL;
> +}
> +
> +struct union_mount *union_rlookup(struct dentry *dentry, struct vfsmount *mnt)
> +{
> +	struct hlist_head *head = union_rhashtable + hash(dentry, mnt);
> +	struct hlist_node *node;
> +	struct union_mount *um;
> +
> +	hlist_for_each_entry(um, node, head, u_rhash) {
> +		if ((um->u_next.dentry == dentry) &&
> +		    (um->u_next.mnt == mnt))
> +			return um;
> +	}
> +
> +	return NULL;
> +}
> +
> +/*
> + * append_to_union - add a path to the bottom of the union stack
> + *
> + * Allocate and attach a union cache entry linking the new, upper
> + * mnt/dentry to the "covered" matching lower mnt/dentry.  It's okay
> + * if the union cache entry already exists.
> + */
> +
> +int append_to_union(struct vfsmount *upper_mnt, struct dentry *upper_dentry,
> +		    struct vfsmount *lower_mnt, struct dentry *lower_dentry)
> +{
> +	struct union_mount *new, *um;
> +
> +	BUG_ON(!S_ISDIR(upper_dentry->d_inode->i_mode));
> +	BUG_ON(!S_ISDIR(lower_dentry->d_inode->i_mode));
> +
> +	/* Common case is that it's already been created, do a lookup first */
> +
> +	spin_lock(&union_lock);
> +	um = union_lookup(upper_dentry, upper_mnt);
> +	if (um) {
> +		BUG_ON((um->u_next.dentry != lower_dentry) ||
> +		       (um->u_next.mnt != lower_mnt));
> +		spin_unlock(&union_lock);
> +		return 0;
> +	}
> +	spin_unlock(&union_lock);
> +
> +	new = union_alloc(upper_dentry, upper_mnt, lower_dentry, lower_mnt);
> +	if (!new)
> +		return -ENOMEM;
> +
> +	spin_lock(&union_lock);
> +	um = union_lookup(upper_dentry, upper_mnt);
> +	if (um) {
> +		/* Someone added it while we were allocating, no problem */
> +		BUG_ON((um->u_next.dentry != lower_dentry) ||
> +		       (um->u_next.mnt != lower_mnt));
> +		spin_unlock(&union_lock);
> +		union_put(new);
> +		return 0;
> +	}
> +	__union_hash(new);
> +	spin_unlock(&union_lock);
> +	return 0;
> +}
> +
> +/*
> + * WARNING! Confusing terminology alert.
> + *
> + * Note that the directions "up" and "down" in union mounts are the
> + * opposite of "up" and "down" in normal VFS operation terminology.
> + * "up" in the rest of the VFS means "towards the root of the mount
> + * tree."  If you mount B on top of A, following B "up" will get you
> + * A.  In union mounts, "up" means "towards the most recently mounted
> + * layer of the union stack."  If you union mount B on top of A,
> + * following A "up" will get you to B.  Another way to put it is that
> + * "up" in the VFS means going from this mount towards the direction
> + * of its mnt->mnt_parent pointer, but "up" in union mounts means
> + * going in the opposite direction (until you run out of union
> + * layers).
> + */

So if this is confusing, why not use a different terminology for union
layers?  Like "next" and "prev" like it is already used in the
structures.

> +
> +/*
> + * union_down_one - get the next lower directory in the union stack
> + *
> + * This is called to traverse the union stack from the given layer to
> + * the next lower layer. union_down_one() is called by various
> + * lookup functions that are aware of union mounts.
> + *
> + * Returns non-zero if followed to the next lower layer, zero otherwise.
> + *
> + * See note on up/down terminology above.
> + */
> +int union_down_one(struct vfsmount **mnt, struct dentry **dentry)
> +{
> +	struct union_mount *um;
> +
> +	if (!IS_MNT_UNION(*mnt))
> +		return 0;
> +
> +	spin_lock(&union_lock);
> +	um = union_lookup(*dentry, *mnt);
> +	spin_unlock(&union_lock);
> +	if (um) {
> +		path_get(&um->u_next);
> +		dput(*dentry);
> +		*dentry = um->u_next.dentry;
> +		mntput(*mnt);
> +		*mnt = um->u_next.mnt;
> +		return 1;
> +	}
> +	return 0;
> +}
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index e035c51..d6c1da2 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -101,6 +101,26 @@ struct dentry {
>  	struct dentry *d_parent;	/* parent directory */
>  	struct qstr d_name;
>  
> +#ifdef CONFIG_UNION_MOUNT
> +	/*
> +	 * Stacks of union mount structures are connected to dentries
> +	 * through the d_unions field.  If this list is not empty,
> +	 * then this dentry is part of a unioned directory stack.
> +	 * Protected by union_lock.
> +	 */
> +	struct list_head d_unions;	/* list of union_mount's */
> +	/*
> +	 * If d_unionized is set, then this dentry is referenced by
> +	 * the u_next field of a union mount structure - that is, it
> +	 * is a dentry for a lower layer of a union.  d_unionized is
> +	 * NOT set in the dentry for the topmost layer of a union.
> +	 *
> +	 * d_unionized would be better renamed to d_union_lower or
> +	 * d_union_ref.
> +	 */
> +	unsigned int d_unionized;	/* unions referencing this dentry */
> +#endif
> +
>  	struct list_head d_lru;		/* LRU list */
>  	/*
>  	 * d_child and d_rcu can share memory
> diff --git a/include/linux/mount.h b/include/linux/mount.h
> index d42be54..85bb75d 100644
> --- a/include/linux/mount.h
> +++ b/include/linux/mount.h
> @@ -64,6 +64,9 @@ struct vfsmount {
>  	struct list_head mnt_slave_list;/* list of slave mounts */
>  	struct list_head mnt_slave;	/* slave list entry */
>  	struct vfsmount *mnt_master;	/* slave is on master->mnt_slave_list */
> +#ifdef CONFIG_UNION_MOUNT
> +	struct list_head mnt_unions;	/* list of union_mount structures */
> +#endif
>  	struct mnt_namespace *mnt_ns;	/* containing namespace */
>  	int mnt_id;			/* mount identifier */
>  	int mnt_group_id;		/* peer group identifier */
> diff --git a/include/linux/union.h b/include/linux/union.h
> new file mode 100644
> index 0000000..71dc35a
> --- /dev/null
> +++ b/include/linux/union.h
> @@ -0,0 +1,54 @@
> +/*
> + * VFS based union mount for Linux
> + *
> + * Copyright (C) 2004-2007 IBM Corporation, IBM Deutschland Entwicklung GmbH.
> + * Copyright (C) 2007 Novell Inc.
> + *   Author(s): Jan Blunck (j.blunck@xxxxxxxxxxxxx)
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License as published by the Free
> + * Software Foundation; either version 2 of the License, or (at your option)
> + * any later version.
> + *
> + */
> +#ifndef __LINUX_UNION_H
> +#define __LINUX_UNION_H
> +#ifdef __KERNEL__
> +
> +#include <linux/list.h>
> +#include <asm/atomic.h>
> +
> +struct dentry;
> +struct vfsmount;
> +
> +#ifdef CONFIG_UNION_MOUNT
> +
> +/*
> + * The union mount structure.
> + */
> +struct union_mount {
> +	atomic_t u_count;		/* reference count */
> +	struct list_head u_unions;	/* list head for d_unions */
> +	struct list_head u_list;	/* list head for mnt_unions */
> +	struct hlist_node u_hash;	/* list head for searching */
> +	struct hlist_node u_rhash;	/* list head for reverse searching */
> +
> +	struct path u_this;		/* this is me */
> +	struct path u_next;		/* this is what I overlay */
> +};
> +
> +#define IS_MNT_UNION(mnt)	((mnt)->mnt_flags & MNT_UNION)
> +
> +extern int append_to_union(struct vfsmount *, struct dentry *,
> +			   struct vfsmount *, struct dentry *);
> +extern int union_down_one(struct vfsmount **, struct dentry **);
> +
> +#else /* CONFIG_UNION_MOUNT */
> +
> +#define IS_MNT_UNION(x)			(0)
> +#define append_to_union(x1, y1, x2, y2)	({ BUG(); (0); })
> +#define union_down_one(x, y)		({ (0); })
> +
> +#endif	/* CONFIG_UNION_MOUNT */
> +#endif	/* __KERNEL__ */
> +#endif	/* __LINUX_UNION_H */
> -- 
> 1.5.6.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux