[PATCH 9/13] vfs: More than one mob management

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This management includes

* new dentry attached to some parent inherits the mob from one
* on rename from one mob to another, the subtree is reattached to
the new mob (see comment below)
* the mob root it marked with a flag and on its death the mob is
killed

Some more words about the rename. This rechage is slow, but is OK,
since the main usecase for mobs is per-countainer dcache management
and the move of some subdir from one container is another is actually
a rare operation which is not expected to be fast.

Moreover, in OpenVZ each container's root is a bind-mount, so if one
tries to do mv one_ct_root/x other_ct_root/ the rename check for the
vfsmnt equality will fail and the real copy will occur.

One bad thing of this approach is - when we mount some new filesystem
to non-init mob-ed dentry the new mount will be attached to the init
mob. Need to do the mob change when we attach an fs to a mountpoint.

Signed-off-by: Pavel Emelyanov <xemul@xxxxxxxxxx>

---
 fs/dcache.c            |  182 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/dcache.h |    5 ++
 2 files changed, 186 insertions(+), 1 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index bfe047d..51fb998 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -145,6 +145,8 @@ static void __d_free(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static void destroy_mob(struct dentry_mob *mob);
+
 /*
  * no locks, please.
  */
@@ -154,6 +156,8 @@ static void d_free(struct dentry *dentry)
 	percpu_counter_dec(&dentry->d_mob->nr_dentry);
 	if (dentry->d_op && dentry->d_op->d_release)
 		dentry->d_op->d_release(dentry);
+	if (dentry->d_flags & DCACHE_MOB_ROOT)
+		destroy_mob(dentry->d_mob);
 
 	/* if dentry was never inserted into hash, immediate free is OK */
 	if (hlist_bl_unhashed(&dentry->d_hash))
@@ -1101,7 +1105,11 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	char *dname;
 	struct dentry_mob *dmob;
 
-	dmob = &init_dentry_mob;
+	if (parent)
+		dmob = parent->d_mob;
+	else
+		dmob = &init_dentry_mob;
+
 	if (dcache_mem_check(dmob))
 		return NULL;
 
@@ -2039,6 +2047,23 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry,
 		spin_unlock(&target->d_parent->d_lock);
 }
 
+static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob);
+static void switch_mobs(struct dentry *dentry, struct dentry *target)
+{
+	if (dentry->d_mob == target->d_mob)
+		return;
+
+	if (dentry->d_flags & DCACHE_MOB_ROOT)
+		return;
+
+	dcache_move_to_new_mob(dentry, target->d_mob);
+	if (target->d_flags & DCACHE_MOB_ROOT) {
+		spin_lock(&dentry->d_lock);
+		dentry->d_flags |= DCACHE_MOB_ROOT;
+		spin_unlock(&dentry->d_lock);
+	}
+}
+
 /*
  * When switching names, the actual string doesn't strictly have to
  * be preserved in the target - because we're dropping the target
@@ -2115,6 +2140,8 @@ void d_move(struct dentry * dentry, struct dentry * target)
 	fsnotify_d_move(dentry);
 	spin_unlock(&dentry->d_lock);
 	write_sequnlock(&dentry->d_sb->s_rename_lock);
+
+	switch_mobs(dentry, target);
 }
 EXPORT_SYMBOL(d_move);
 
@@ -2826,6 +2853,159 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name)
 }
 EXPORT_SYMBOL(find_inode_number);
 
+static struct dentry_mob *create_mob(struct dentry_mob *cur)
+{
+	struct dentry_mob *dmob;
+
+	dmob = kmalloc(sizeof(struct dentry_mob), GFP_KERNEL);
+	if (dmob == NULL)
+		return NULL;
+
+	if (percpu_counter_init(&dmob->nr_dentry, 0) < 0) {
+		kfree(dmob);
+		return NULL;
+	}
+
+	dmob->nr_dentry_max = cur->nr_dentry_max;
+	INIT_LIST_HEAD(&dmob->dentry_lru);
+
+	return dmob;
+}
+
+static void destroy_mob(struct dentry_mob *mob)
+{
+	if (percpu_counter_sum(&mob->nr_dentry) != 0)
+		BUG();
+	if (!list_empty(&mob->dentry_lru))
+		BUG();
+
+	percpu_counter_destroy(&mob->nr_dentry);
+	kfree(mob);
+}
+
+static void dentry_move_to_mob(struct dentry *de, struct dentry_mob *dmob)
+{
+	percpu_counter_dec(&de->d_mob->nr_dentry);
+	if (!list_empty(&de->d_lru)) {
+		spin_lock(&dcache_lru_lock);
+		list_del_init(&de->d_lru);
+		spin_unlock(&dcache_lru_lock);
+	}
+
+	de->d_mob = dmob;
+
+	percpu_counter_inc(&dmob->nr_dentry);
+	if (!de->d_count) {
+		spin_lock(&dcache_lru_lock);
+		list_add_tail(&de->d_lru, &dmob->dentry_lru);
+		spin_unlock(&dcache_lru_lock);
+	}
+}
+
+static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob)
+{
+	struct dentry *this_parent;
+	struct list_head *next;
+	unsigned seq;
+	int locked = 0;
+
+	seq = read_seqbegin(&root->d_sb->s_rename_lock);
+again:
+	this_parent = root;
+	spin_lock(&this_parent->d_lock);
+repeat:
+	next = this_parent->d_subdirs.next;
+resume:
+	while (next != &this_parent->d_subdirs) {
+		struct list_head *tmp = next;
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+		next = tmp->next;
+
+		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
+		/*
+		 * Descend a level if the d_subdirs list is non-empty.
+		 */
+		if (!list_empty(&dentry->d_subdirs)) {
+			spin_unlock(&this_parent->d_lock);
+			spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_);
+			this_parent = dentry;
+			spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_);
+			goto repeat;
+		}
+
+		dentry_move_to_mob(dentry, dmob);
+
+		spin_unlock(&dentry->d_lock);
+	}
+	/*
+	 * All done at this level ... ascend and resume the search.
+	 */
+	if (this_parent != root) {
+		struct dentry *tmp;
+		struct dentry *child;
+
+		tmp = this_parent->d_parent;
+		rcu_read_lock();
+		spin_unlock(&this_parent->d_lock);
+		child = this_parent;
+		this_parent = tmp;
+		spin_lock(&this_parent->d_lock);
+		/* might go back up the wrong parent if we have had a rename
+		 * or deletion */
+		if (this_parent != child->d_parent ||
+			(!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))) {
+			spin_unlock(&this_parent->d_lock);
+			rcu_read_unlock();
+			goto rename_retry;
+		}
+		rcu_read_unlock();
+		next = child->d_u.d_child.next;
+
+		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
+		dentry_move_to_mob(child, dmob);
+		spin_unlock(&child->d_lock);
+
+		goto resume;
+	}
+
+	spin_unlock(&this_parent->d_lock);
+	if (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))
+		goto rename_retry;
+	if (locked)
+		read_sequnlock(&root->d_sb->s_rename_lock);
+	return;
+
+rename_retry:
+	locked = 1;
+	read_seqlock(&root->d_sb->s_rename_lock);
+	goto again;
+}
+
+int dcache_new_mob(struct dentry *root)
+{
+	struct dentry_mob *dmob, *old = NULL;
+
+	if (root->d_flags & DCACHE_MOB_ROOT)
+		old = root->d_mob;
+
+	dmob = create_mob(root->d_mob);
+	if (dmob == NULL)
+		return -ENOMEM;
+
+	dcache_move_to_new_mob(root, dmob);
+
+	spin_lock(&root->d_lock);
+	root->d_flags |= DCACHE_MOB_ROOT;
+	root->d_mob = dmob;
+	percpu_counter_inc(&dmob->nr_dentry);
+	spin_unlock(&root->d_lock);
+
+	if (old != NULL)
+		destroy_mob(old);
+
+	return 0;
+}
+
 static __initdata unsigned long dhash_entries;
 static int __init set_dhash_entries(char *str)
 {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 80bb9e4..3681307 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -54,6 +54,9 @@ struct dentry_mob {
 	struct list_head dentry_lru;
 };
 
+struct dentry;
+int dcache_new_mob(struct dentry *root);
+
 /*
  * Compare 2 name strings, return 0 if they match, otherwise non-zero.
  * The strings are both count bytes long, and count is non-zero.
@@ -227,6 +230,8 @@ struct dentry_operations {
 #define DCACHE_MANAGED_DENTRY \
 	(DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT)
 
+#define DCACHE_MOB_ROOT		0x80000
+
 static inline int dname_external(struct dentry *dentry)
 {
 	return dentry->d_name.name != dentry->d_iname;
-- 
1.5.5.6
--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux