This patch is preparing for the implementation of cgroup pool. If a kernfs node is set to pinned. the data of this node will no longer be protected by kernfs internally. When it performs the following actions, the area protected by kernfs_rwsem will be protected by the specific spinlock: 1.rename this node 2.remove this node 3.create child node Suggested-by: Shanpei Chen <shanpeic@xxxxxxxxxxxxxxxxx> Signed-off-by: Yi Tao <escape@xxxxxxxxxxxxxxxxx> --- fs/kernfs/dir.c | 74 ++++++++++++++++++++++++++++++++++++-------------- include/linux/kernfs.h | 14 ++++++++++ 2 files changed, 68 insertions(+), 20 deletions(-) diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c index ba581429bf7b..68b05b5bc1a2 100644 --- a/fs/kernfs/dir.c +++ b/fs/kernfs/dir.c @@ -26,7 +26,6 @@ static bool kernfs_active(struct kernfs_node *kn) { - lockdep_assert_held(&kernfs_rwsem); return atomic_read(&kn->active) >= 0; } @@ -461,10 +460,9 @@ static void kernfs_drain(struct kernfs_node *kn) { struct kernfs_root *root = kernfs_root(kn); - lockdep_assert_held_write(&kernfs_rwsem); WARN_ON_ONCE(kernfs_active(kn)); - up_write(&kernfs_rwsem); + kernfs_unlock(kn); if (kernfs_lockdep(kn)) { rwsem_acquire(&kn->dep_map, 0, 0, _RET_IP_); @@ -483,7 +481,7 @@ static void kernfs_drain(struct kernfs_node *kn) kernfs_drain_open_files(kn); - down_write(&kernfs_rwsem); + kernfs_lock(kn); } /** @@ -722,7 +720,7 @@ int kernfs_add_one(struct kernfs_node *kn) bool has_ns; int ret; - down_write(&kernfs_rwsem); + kernfs_lock(parent); ret = -EINVAL; has_ns = kernfs_ns_enabled(parent); @@ -753,7 +751,7 @@ int kernfs_add_one(struct kernfs_node *kn) ps_iattr->ia_mtime = ps_iattr->ia_ctime; } - up_write(&kernfs_rwsem); + kernfs_unlock(parent); /* * Activate the new node unless CREATE_DEACTIVATED is requested. @@ -767,7 +765,7 @@ int kernfs_add_one(struct kernfs_node *kn) return 0; out_unlock: - up_write(&kernfs_rwsem); + kernfs_unlock(parent); return ret; } @@ -788,8 +786,6 @@ static struct kernfs_node *kernfs_find_ns(struct kernfs_node *parent, bool has_ns = kernfs_ns_enabled(parent); unsigned int hash; - lockdep_assert_held(&kernfs_rwsem); - if (has_ns != (bool)ns) { WARN(1, KERN_WARNING "kernfs: ns %s in '%s' for '%s'\n", has_ns ? "required" : "invalid", parent->name, name); @@ -1242,8 +1238,6 @@ static struct kernfs_node *kernfs_next_descendant_post(struct kernfs_node *pos, { struct rb_node *rbn; - lockdep_assert_held_write(&kernfs_rwsem); - /* if first iteration, visit leftmost descendant which may be root */ if (!pos) return kernfs_leftmost_descendant(root); @@ -1299,8 +1293,6 @@ static void __kernfs_remove(struct kernfs_node *kn) { struct kernfs_node *pos; - lockdep_assert_held_write(&kernfs_rwsem); - /* * Short-circuit if non-root @kn has already finished removal. * This is for kernfs_remove_self() which plays with active ref @@ -1369,9 +1361,9 @@ static void __kernfs_remove(struct kernfs_node *kn) */ void kernfs_remove(struct kernfs_node *kn) { - down_write(&kernfs_rwsem); + kernfs_lock(kn); __kernfs_remove(kn); - up_write(&kernfs_rwsem); + kernfs_unlock(kn); } /** @@ -1525,13 +1517,13 @@ int kernfs_remove_by_name_ns(struct kernfs_node *parent, const char *name, return -ENOENT; } - down_write(&kernfs_rwsem); + kernfs_lock(parent); kn = kernfs_find_ns(parent, name, ns); if (kn) __kernfs_remove(kn); - up_write(&kernfs_rwsem); + kernfs_unlock(parent); if (kn) return 0; @@ -1557,7 +1549,9 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, if (!kn->parent) return -EINVAL; - down_write(&kernfs_rwsem); + /* if parent is pinned, parent->lock protects rename */ + if (!kn->parent->pinned) + down_write(&kernfs_rwsem); error = -ENOENT; if (!kernfs_active(kn) || !kernfs_active(new_parent) || @@ -1576,7 +1570,8 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, /* rename kernfs_node */ if (strcmp(kn->name, new_name) != 0) { error = -ENOMEM; - new_name = kstrdup_const(new_name, GFP_KERNEL); + /* use GFP_ATOMIC to avoid sleep */ + new_name = kstrdup_const(new_name, GFP_ATOMIC); if (!new_name) goto out; } else { @@ -1611,10 +1606,49 @@ int kernfs_rename_ns(struct kernfs_node *kn, struct kernfs_node *new_parent, error = 0; out: - up_write(&kernfs_rwsem); + if (!kn->parent->pinned) + up_write(&kernfs_rwsem); return error; } +/* Traverse all descendants and set pinned */ +void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock) +{ + struct kernfs_node *pos = NULL; + + while ((pos = kernfs_next_descendant_post(pos, kn))) { + pos->pinned = true; + pos->lock = lock; + } +} + +/* Traverse all descendants and clear pinned */ +void kernfs_clear_pinned(struct kernfs_node *kn) +{ + struct kernfs_node *pos = NULL; + + while ((pos = kernfs_next_descendant_post(pos, kn))) { + pos->pinned = false; + pos->lock = NULL; + } +} + +void kernfs_lock(struct kernfs_node *kn) +{ + if (!kn->pinned) + down_write(&kernfs_rwsem); + else + spin_lock(kn->lock); +} + +void kernfs_unlock(struct kernfs_node *kn) +{ + if (!kn->pinned) + up_write(&kernfs_rwsem); + else + spin_unlock(kn->lock); +} + /* Relationship between mode and the DT_xxx types */ static inline unsigned char dt_type(struct kernfs_node *kn) { diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h index 1093abf7c28c..a70d96308c51 100644 --- a/include/linux/kernfs.h +++ b/include/linux/kernfs.h @@ -161,6 +161,13 @@ struct kernfs_node { unsigned short flags; umode_t mode; struct kernfs_iattrs *iattr; + + /* + * If pinned is true, use lock to protect remove, rename this kernfs + * node or create child kernfs node. + */ + bool pinned; + spinlock_t *lock; }; /* @@ -415,6 +422,11 @@ int kernfs_xattr_set(struct kernfs_node *kn, const char *name, struct kernfs_node *kernfs_find_and_get_node_by_id(struct kernfs_root *root, u64 id); + +void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock); +void kernfs_clear_pinned(struct kernfs_node *kn); +void kernfs_lock(struct kernfs_node *kn); +void kernfs_unlock(struct kernfs_node *kn); #else /* CONFIG_KERNFS */ static inline enum kernfs_node_type kernfs_type(struct kernfs_node *kn) @@ -528,6 +540,8 @@ static inline void kernfs_kill_sb(struct super_block *sb) { } static inline void kernfs_init(void) { } +inline void kernfs_set_pinned(struct kernfs_node *kn, spinlock_t *lock) {} +inline void kernfs_clear_pinned(struct kernfs_node *kn) {} #endif /* CONFIG_KERNFS */ /** -- 1.8.3.1