This management includes * new dentry attached to some parent inherits the mob from one * on rename from one mob to another, the subtree is reattached to the new mob (see comment below) * the mob root it marked with a flag and on its death the mob is killed Some more words about the rename. This rechage is slow, but is OK, since the main usecase for mobs is per-countainer dcache management and the move of some subdir from one container is another is actually a rare operation which is not expected to be fast. Moreover, in OpenVZ each container's root is a bind-mount, so if one tries to do mv one_ct_root/x other_ct_root/ the rename check for the vfsmnt equality will fail and the real copy will occur. One bad thing of this approach is - when we mount some new filesystem to non-init mob-ed dentry the new mount will be attached to the init mob. Need to do the mob change when we attach an fs to a mountpoint. Signed-off-by: Pavel Emelyanov <xemul@xxxxxxxxxx> --- fs/dcache.c | 182 +++++++++++++++++++++++++++++++++++++++++++++++- include/linux/dcache.h | 5 ++ 2 files changed, 186 insertions(+), 1 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index bfe047d..51fb998 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -145,6 +145,8 @@ static void __d_free(struct rcu_head *head) kmem_cache_free(dentry_cache, dentry); } +static void destroy_mob(struct dentry_mob *mob); + /* * no locks, please. */ @@ -154,6 +156,8 @@ static void d_free(struct dentry *dentry) percpu_counter_dec(&dentry->d_mob->nr_dentry); if (dentry->d_op && dentry->d_op->d_release) dentry->d_op->d_release(dentry); + if (dentry->d_flags & DCACHE_MOB_ROOT) + destroy_mob(dentry->d_mob); /* if dentry was never inserted into hash, immediate free is OK */ if (hlist_bl_unhashed(&dentry->d_hash)) @@ -1101,7 +1105,11 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name) char *dname; struct dentry_mob *dmob; - dmob = &init_dentry_mob; + if (parent) + dmob = parent->d_mob; + else + dmob = &init_dentry_mob; + if (dcache_mem_check(dmob)) return NULL; @@ -2039,6 +2047,23 @@ static void dentry_unlock_parents_for_move(struct dentry *dentry, spin_unlock(&target->d_parent->d_lock); } +static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob); +static void switch_mobs(struct dentry *dentry, struct dentry *target) +{ + if (dentry->d_mob == target->d_mob) + return; + + if (dentry->d_flags & DCACHE_MOB_ROOT) + return; + + dcache_move_to_new_mob(dentry, target->d_mob); + if (target->d_flags & DCACHE_MOB_ROOT) { + spin_lock(&dentry->d_lock); + dentry->d_flags |= DCACHE_MOB_ROOT; + spin_unlock(&dentry->d_lock); + } +} + /* * When switching names, the actual string doesn't strictly have to * be preserved in the target - because we're dropping the target @@ -2115,6 +2140,8 @@ void d_move(struct dentry * dentry, struct dentry * target) fsnotify_d_move(dentry); spin_unlock(&dentry->d_lock); write_sequnlock(&dentry->d_sb->s_rename_lock); + + switch_mobs(dentry, target); } EXPORT_SYMBOL(d_move); @@ -2826,6 +2853,159 @@ ino_t find_inode_number(struct dentry *dir, struct qstr *name) } EXPORT_SYMBOL(find_inode_number); +static struct dentry_mob *create_mob(struct dentry_mob *cur) +{ + struct dentry_mob *dmob; + + dmob = kmalloc(sizeof(struct dentry_mob), GFP_KERNEL); + if (dmob == NULL) + return NULL; + + if (percpu_counter_init(&dmob->nr_dentry, 0) < 0) { + kfree(dmob); + return NULL; + } + + dmob->nr_dentry_max = cur->nr_dentry_max; + INIT_LIST_HEAD(&dmob->dentry_lru); + + return dmob; +} + +static void destroy_mob(struct dentry_mob *mob) +{ + if (percpu_counter_sum(&mob->nr_dentry) != 0) + BUG(); + if (!list_empty(&mob->dentry_lru)) + BUG(); + + percpu_counter_destroy(&mob->nr_dentry); + kfree(mob); +} + +static void dentry_move_to_mob(struct dentry *de, struct dentry_mob *dmob) +{ + percpu_counter_dec(&de->d_mob->nr_dentry); + if (!list_empty(&de->d_lru)) { + spin_lock(&dcache_lru_lock); + list_del_init(&de->d_lru); + spin_unlock(&dcache_lru_lock); + } + + de->d_mob = dmob; + + percpu_counter_inc(&dmob->nr_dentry); + if (!de->d_count) { + spin_lock(&dcache_lru_lock); + list_add_tail(&de->d_lru, &dmob->dentry_lru); + spin_unlock(&dcache_lru_lock); + } +} + +static void dcache_move_to_new_mob(struct dentry *root, struct dentry_mob *dmob) +{ + struct dentry *this_parent; + struct list_head *next; + unsigned seq; + int locked = 0; + + seq = read_seqbegin(&root->d_sb->s_rename_lock); +again: + this_parent = root; + spin_lock(&this_parent->d_lock); +repeat: + next = this_parent->d_subdirs.next; +resume: + while (next != &this_parent->d_subdirs) { + struct list_head *tmp = next; + struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child); + next = tmp->next; + + spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); + /* + * Descend a level if the d_subdirs list is non-empty. + */ + if (!list_empty(&dentry->d_subdirs)) { + spin_unlock(&this_parent->d_lock); + spin_release(&dentry->d_lock.dep_map, 1, _RET_IP_); + this_parent = dentry; + spin_acquire(&this_parent->d_lock.dep_map, 0, 1, _RET_IP_); + goto repeat; + } + + dentry_move_to_mob(dentry, dmob); + + spin_unlock(&dentry->d_lock); + } + /* + * All done at this level ... ascend and resume the search. + */ + if (this_parent != root) { + struct dentry *tmp; + struct dentry *child; + + tmp = this_parent->d_parent; + rcu_read_lock(); + spin_unlock(&this_parent->d_lock); + child = this_parent; + this_parent = tmp; + spin_lock(&this_parent->d_lock); + /* might go back up the wrong parent if we have had a rename + * or deletion */ + if (this_parent != child->d_parent || + (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq))) { + spin_unlock(&this_parent->d_lock); + rcu_read_unlock(); + goto rename_retry; + } + rcu_read_unlock(); + next = child->d_u.d_child.next; + + spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED); + dentry_move_to_mob(child, dmob); + spin_unlock(&child->d_lock); + + goto resume; + } + + spin_unlock(&this_parent->d_lock); + if (!locked && read_seqretry(&root->d_sb->s_rename_lock, seq)) + goto rename_retry; + if (locked) + read_sequnlock(&root->d_sb->s_rename_lock); + return; + +rename_retry: + locked = 1; + read_seqlock(&root->d_sb->s_rename_lock); + goto again; +} + +int dcache_new_mob(struct dentry *root) +{ + struct dentry_mob *dmob, *old = NULL; + + if (root->d_flags & DCACHE_MOB_ROOT) + old = root->d_mob; + + dmob = create_mob(root->d_mob); + if (dmob == NULL) + return -ENOMEM; + + dcache_move_to_new_mob(root, dmob); + + spin_lock(&root->d_lock); + root->d_flags |= DCACHE_MOB_ROOT; + root->d_mob = dmob; + percpu_counter_inc(&dmob->nr_dentry); + spin_unlock(&root->d_lock); + + if (old != NULL) + destroy_mob(old); + + return 0; +} + static __initdata unsigned long dhash_entries; static int __init set_dhash_entries(char *str) { diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 80bb9e4..3681307 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -54,6 +54,9 @@ struct dentry_mob { struct list_head dentry_lru; }; +struct dentry; +int dcache_new_mob(struct dentry *root); + /* * Compare 2 name strings, return 0 if they match, otherwise non-zero. * The strings are both count bytes long, and count is non-zero. @@ -227,6 +230,8 @@ struct dentry_operations { #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) +#define DCACHE_MOB_ROOT 0x80000 + static inline int dname_external(struct dentry *dentry) { return dentry->d_name.name != dentry->d_iname; -- 1.5.5.6 -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html