This allows namespace data structure to be shared by cephfs inodes with same layout. Signed-off-by: Yan, Zheng <zyan@xxxxxxxxxx> --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/pool_ns.h | 47 +++++++++++++++ net/ceph/Makefile | 2 +- net/ceph/ceph_common.c | 2 + net/ceph/pool_ns.c | 133 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 184 insertions(+), 1 deletion(-) create mode 100644 include/linux/ceph/pool_ns.h create mode 100644 net/ceph/pool_ns.c diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index e7975e4..40e0d84 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -21,6 +21,7 @@ #include <linux/ceph/mon_client.h> #include <linux/ceph/osd_client.h> #include <linux/ceph/ceph_fs.h> +#include <linux/ceph/pool_ns.h> /* * mount options diff --git a/include/linux/ceph/pool_ns.h b/include/linux/ceph/pool_ns.h new file mode 100644 index 0000000..627cf9d --- /dev/null +++ b/include/linux/ceph/pool_ns.h @@ -0,0 +1,47 @@ +#ifndef _FS_CEPH_POOL_NS_H +#define _FS_CEPH_POOL_NS_H + +#include <linux/types.h> +#include <linux/kref.h> +#include <linux/rbtree.h> +#include <linux/rcupdate.h> + +struct ceph_pool_ns { + struct kref kref; + union { + struct rb_node node; + struct rcu_head rcu; + }; + size_t name_len; + char name[]; +}; + +extern void ceph_release_pool_ns(struct kref *ref); +extern struct ceph_pool_ns *ceph_find_or_create_pool_ns(const char *str, + size_t len); +extern struct ceph_pool_ns *ceph_try_get_pool_ns(struct ceph_pool_ns **pns); +extern void ceph_pool_ns_cleanup(void); + +static inline void ceph_get_pool_ns(struct ceph_pool_ns *ns) +{ + kref_get(&ns->kref); +} + +static inline void ceph_put_pool_ns(struct ceph_pool_ns *ns) +{ + if (!ns) + return; + kref_put(&ns->kref, ceph_release_pool_ns); +} + +static inline int ceph_compare_pool_ns(struct ceph_pool_ns *ns, + const char* str, size_t len) +{ + size_t nsl = ns ? ns->name_len : 0; + if (nsl != len) + return nsl - len; + if (nsl == 0) + return 0; + return strncmp(ns->name, str, len); +} +#endif diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 958d9856..520dab6 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -11,5 +11,5 @@ libceph-y := ceph_common.o messenger.o msgpool.o buffer.o pagelist.o \ crypto.o armor.o \ auth_x.o \ ceph_fs.o ceph_strings.o ceph_hash.o \ - pagevec.o snapshot.o + pagevec.o snapshot.o pool_ns.o diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index dcc18c6..ac60293 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -25,6 +25,7 @@ #include <linux/ceph/decode.h> #include <linux/ceph/mon_client.h> #include <linux/ceph/auth.h> +#include <linux/ceph/pool_ns.h> #include "crypto.h" @@ -751,6 +752,7 @@ static void __exit exit_ceph_lib(void) ceph_msgr_exit(); ceph_crypto_shutdown(); ceph_debugfs_cleanup(); + ceph_pool_ns_cleanup(); } module_init(init_ceph_lib); diff --git a/net/ceph/pool_ns.c b/net/ceph/pool_ns.c new file mode 100644 index 0000000..64ccd77 --- /dev/null +++ b/net/ceph/pool_ns.c @@ -0,0 +1,133 @@ +#include <linux/slab.h> +#include <linux/gfp.h> +#include <linux/string.h> +#include <linux/spinlock.h> +#include <linux/ceph/pool_ns.h> + +static DEFINE_SPINLOCK(pool_ns_lock); +static struct rb_root pool_ns_tree = RB_ROOT; + +struct ceph_pool_ns *ceph_find_or_create_pool_ns(const char* name, size_t len) +{ + struct ceph_pool_ns *ns, *exist; + struct rb_node **p, *parent; + int ret; + + exist = NULL; + spin_lock(&pool_ns_lock); + p = &pool_ns_tree.rb_node; + while (*p) { + exist = rb_entry(*p, struct ceph_pool_ns, node); + ret = ceph_compare_pool_ns(exist, name, len); + if (ret > 0) + p = &(*p)->rb_left; + else if (ret < 0) + p = &(*p)->rb_right; + else + break; + exist = NULL; + } + if (exist && !kref_get_unless_zero(&exist->kref)) { + rb_erase(&exist->node, &pool_ns_tree); + RB_CLEAR_NODE(&exist->node); + exist = NULL; + } + spin_unlock(&pool_ns_lock); + if (exist) + return exist; + + ns = kmalloc(sizeof(*ns) + len + 1, GFP_NOFS); + if (!ns) + return NULL; + + kref_init(&ns->kref); + ns->name_len = len; + memcpy(ns->name, name, len); + ns->name[len] = 0; + +retry: + exist = NULL; + parent = NULL; + p = &pool_ns_tree.rb_node; + spin_lock(&pool_ns_lock); + while (*p) { + parent = *p; + exist = rb_entry(*p, struct ceph_pool_ns, node); + ret = ceph_compare_pool_ns(exist, name, len); + if (ret > 0) + p = &(*p)->rb_left; + else if (ret < 0) + p = &(*p)->rb_right; + else + break; + exist = NULL; + } + ret = 0; + if (!exist) { + rb_link_node(&ns->node, parent, p); + rb_insert_color(&ns->node, &pool_ns_tree); + } else if (!kref_get_unless_zero(&exist->kref)) { + rb_erase(&exist->node, &pool_ns_tree); + RB_CLEAR_NODE(&exist->node); + ret = -EAGAIN; + } + spin_unlock(&pool_ns_lock); + if (ret == -EAGAIN) + goto retry; + + if (exist) { + kfree(ns); + ns = exist; + } + + return ns; +} +EXPORT_SYMBOL(ceph_find_or_create_pool_ns); + +static void ceph_free_pool_ns(struct rcu_head *head) +{ + struct ceph_pool_ns *ns = container_of(head, struct ceph_pool_ns, rcu); + kfree(ns); +} + +void ceph_release_pool_ns(struct kref *ref) +{ + struct ceph_pool_ns *ns = container_of(ref, struct ceph_pool_ns, kref); + + spin_lock(&pool_ns_lock); + if (!RB_EMPTY_NODE(&ns->node)) { + rb_erase(&ns->node, &pool_ns_tree); + RB_CLEAR_NODE(&ns->node); + } + spin_unlock(&pool_ns_lock); + + call_rcu(&ns->rcu, ceph_free_pool_ns); +} +EXPORT_SYMBOL(ceph_release_pool_ns); + +struct ceph_pool_ns *ceph_try_get_pool_ns(struct ceph_pool_ns **pns) +{ + struct ceph_pool_ns *ns; + rcu_read_lock(); + ns = rcu_dereference(*pns); + if (ns && !kref_get_unless_zero(&ns->kref)) + ns = NULL; + rcu_read_unlock(); + return ns; +} +EXPORT_SYMBOL(ceph_try_get_pool_ns); + +void ceph_pool_ns_cleanup(void) +{ + struct rb_node *p; + struct ceph_pool_ns *ns; + if (RB_EMPTY_ROOT(&pool_ns_tree)) + return; + + pr_err("libceph: detect pool ns leaks\n"); + while ((p = rb_first(&pool_ns_tree))) { + ns = rb_entry(p, struct ceph_pool_ns, node); + rb_erase(p, &pool_ns_tree); + kfree(ns); + } +} -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe ceph-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html