[RFC 2/2] ext4 btree basic implementation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



---
 ext4_btree.c | 1356 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1356 insertions(+)
 create mode 100644 ext4_btree.c

diff --git a/ext4_btree.c b/ext4_btree.c
new file mode 100644
index 0000000..baf253c
--- /dev/null
+++ b/ext4_btree.c
@@ -0,0 +1,1356 @@
+/*
+ * copyright (C) 2015 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+
+
+
+#include "ext4_btree.h"
+
+/*
+ * Calculate offset of the n-th key in a btree node.
+ */
+static inline unsigned int
+ext4_btree_key_offset(struct ext4_btree_geo *geo, unsigned int n)
+{
+	return geo->header_len + n * geo->key_len;
+}
+
+/*
+ * Calculate offset of the n-th node pointer in a btree node.
+ */
+static inline unsigned int
+ext4_btree_val_offset(struct ext4_btree_geo *geo, unsigned int n)
+{
+	return geo->header_len +
+		geo->max_pairs * geo->key_len +
+		n * geo->val_len;
+}
+
+/*
+ * Calculate offset of the n-th record in a btree leaf node.
+ */
+static inline unsigned int
+ext4_btree_rec_offset(struct ext4_btree_geo *geo, unsigned int n)
+{
+	return geo->header_len + n * geo->rec_len;
+}
+
+/*
+ * Node header access functions
+ */
+static inline struct ext4_btree_header*
+ext4_btree_header_addr(struct ext4_btree_node *block)
+{
+	return (struct ext4_btree_header *)block;
+}
+
+static inline unsigned int 
+ext4_btree_get_numkeys(struct ext4_btree_node *node)
+{
+	return ext4_btree_header_addr(node)->btree_numkeys;
+}
+
+static inline void
+ext4_btree_update_numkeys(struct ext4_btree_node *node, unsigned int n)
+{
+	ext4_btree_header_addr(node)->btree_numkeys = n;
+}
+
+static inline unsigned int 
+ext4_btree_get_numrecs(struct ext4_btree_node *node)
+{
+	return ext4_btree_header_addr(node)->btree_numkeys;
+}
+
+static inline void
+ext4_btree_update_numrecs(struct ext4_btree_node *node, unsigned int n)
+{
+	ext4_btree_header_addr(node)->btree_numkeys = n;
+}
+
+static inline unsigned int 
+ext4_btree_get_level(struct ext4_btree_node *node)
+{
+	return ext4_btree_header_addr(node)->btree_level;
+}
+
+static inline void 
+ext4_btree_update_level(struct ext4_btree_node *node, unsigned int level)
+{
+	ext4_btree_header_addr(node)->btree_level = level;
+}
+
+static inline unsigned long long 
+ext4_btree_get_blockno(struct ext4_btree_node *node)
+{
+	return ext4_btree_header_addr(node)->btree_blockno;
+}
+
+static inline void 
+ext4_btree_update_blockno(struct ext4_btree_node *node, 
+			  unsigned long long blockno)
+{
+	ext4_btree_header_addr(node)->btree_blockno = blockno;
+}
+
+/*
+* Get n-th key address from btree node
+*/
+static struct ext4_btree_key*
+ext4_btree_key_addr(struct ext4_btree_geo *geo,
+		    struct ext4_btree_node * node,
+		    unsigned int n)
+{
+	return (struct ext4_btree_key *)
+		((unsigned char *)node + ext4_btree_key_offset(geo, n));
+}
+
+/*
+ * Set n-th key from btree node
+ */
+static void
+ext4_btree_set_key(struct ext4_btree_geo *geo,
+		   struct ext4_btree_node *node, 
+		   unsigned int n,
+		   struct ext4_btree_key *key)
+{
+	struct ext4_btree_key *keyoffset = ext4_btree_key_addr(geo, node, n);
+	*keyoffset = *key;
+}
+
+static void  
+ext4_btree_clear_key(struct ext4_btree_key *key)
+{
+	key->blockno = 0;
+}
+
+/*
+ * Get n-th val address from btree node
+ */
+static struct ext4_btree_val*
+ext4_btree_val_addr(struct ext4_btree_geo *geo,
+	            struct ext4_btree_node *node,
+		    unsigned int n)
+{
+	return (struct ext4_btree_val *)
+		((unsigned char *)node + ext4_btree_val_offset(geo, n));
+}
+
+/*
+ * Set n-th val from btree node
+ */
+static void
+ext4_btree_set_val(struct ext4_btree_geo *geo,
+		   struct ext4_btree_node *node, 
+		   unsigned int n,
+		   struct ext4_btree_val *val)
+{
+	struct ext4_btree_val *valaddr = ext4_btree_val_addr(geo, node, n);
+	*valaddr = *val;
+}
+
+static void  
+ext4_btree_clear_val(struct ext4_btree_val *val)
+{
+	val->blockno = 0;
+}
+
+/*
+ * Get n-th record address from btree node
+ */
+static struct ext4_btree_rec*
+ext4_btree_rec_addr(struct ext4_btree_geo *geo,
+		    struct ext4_btree_node *node,
+		    unsigned int n)
+{
+	return (struct ext4_btree_rec *)
+		((unsigned char *)node + ext4_btree_rec_offset(geo, n));
+}
+
+
+/*
+ * Set n-th value from btree node
+ */
+static void
+ext4_btree_set_rec(struct ext4_btree_geo *geo,
+		   struct ext4_btree_node *node,
+		   unsigned int n,
+		   struct ext4_btree_rec *rec)
+{
+	struct ext4_btree_rec *rec_offset = ext4_btree_rec_addr(geo, node, n);
+	*rec_offset = *rec;
+}
+
+static void
+ext4_btree_clear_rec(struct ext4_btree_rec *rec)
+{
+		rec->key.blockno = 0;
+		rec->len = 0;
+		rec->ref_count = 0;
+}
+
+/*
+ * Initialize btree root node
+ */
+
+void
+ext4_btree_root_init(struct ext4_btree_root *root,
+		     struct ext4_btree_geo *geo)
+{
+	root->node = NULL;
+	root->geo = (*geo);
+}
+
+/*
+ * Initialize ref btree root node
+ */
+void
+ext4_ref_btree_init(struct ext4_btree_root *root)
+{
+	ext4_btree_root_init(root, &ref_btree_geo);
+}
+
+/*
+ * Allocate a btree node
+ */
+struct ext4_btree_node *
+ext4_btree_node_alloc()
+{
+	return fs_alloc_btree_node();
+}
+
+/*
+ * Free a btree node
+ */
+
+void
+ext4_btree_node_free( struct ext4_btree_node *node)
+{
+	fs_free_btree_node(node);
+}
+
+/*
+ * Compare two btree keys. 
+ * Return 1 if key1 > key2. 
+ * Return 0 if key1 == key2. 
+ * Return -1 if key1 < key2.  
+ */
+int
+ext4_btree_key_comp(struct ext4_btree_geo *geo,
+	struct ext4_btree_key *key1,
+	struct ext4_btree_key *key2)
+{
+	if (key1->blockno < key2->blockno)
+		return -1;
+	else if (key1->blockno > key2->blockno)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * If specified key within record' range
+ * Return -1 if key < record's key 
+ * Return 0 if record has the key
+ * Return 1 if record's key + len < key
+ */
+int
+ext4_btree_rec_comp(struct ext4_btree_geo *geo,
+		    struct ext4_btree_key *key,
+		    struct ext4_btree_rec *rec)
+{
+	if (key->blockno < rec->key.blockno)
+		return -1;
+	else if ((rec->key.blockno + rec->len) <= key->blockno)
+		return 1;
+	else
+		return 0;
+}
+
+/*
+ * Check if the given record has this key 
+ * Return 1 if record has this key within range 
+ * Return 0 if not
+ */
+static inline int
+ext4_btree_rec_has_key(struct ext4_btree_geo *geo,
+		       struct ext4_btree_rec *rec,
+		       struct ext4_btree_key *key)
+{
+	return ((rec->key.blockno <=  key->blockno) &&
+		((rec->key.blockno + rec->len) > key->blockno));
+}
+
+static inline void 
+ext4_btree_set_search_path(struct ext4_btree_search_path * spath,
+			   int level, 
+			   struct ext4_btree_node * node,
+			   int pos)
+{
+	if (spath) {
+		spath->node[level] = node;
+		spath->pos[level] = pos;
+	}
+}
+
+/* define the btree layout for refcount btree */
+struct ext4_btree_geo ref_btree_geo = {
+	EXT4_BTREE_NODESIZE,
+	EXT4_BTREE_HEADER_SIZE,
+	EXT4_BTREE_KEY_SIZE,
+	EXT4_BTREE_VAL_SIZE,
+	EXT4_BTREE_REC_SIZE,
+	EXT4_BTREE_MAX_KEY_VAL_PAIRS,
+	EXT4_BTREE_MAX_RECS
+};
+
+/* remember the index btree node on the search path */
+struct ext4_btree_search_path {
+	struct ext4_btree_node * node[EXT4_BTREE_MAX_LEVELS];
+	int	pos [EXT4_BTREE_MAX_LEVELS];
+};
+
+
+/*
+ * Initialize the search path
+ */
+void 
+ext4_btree_init_search_path (struct ext4_btree_search_path *spath)
+{
+	int i;
+	for (i=0; i< EXT4_BTREE_MAX_LEVELS; i++) {
+		spath->node[i] = NULL;
+		spath->pos[i] = EXT4_BTREE_MAX_KEY_VAL_PAIRS;
+	}
+}
+
+
+/*
+ * Debug function to print out the whole tree
+ */
+
+void 
+ext4_btree_rec_node_print(struct ext4_btree_geo *geo,
+			  struct ext4_btree_node *node)
+{
+	int i;
+	struct ext4_btree_rec *rec;
+	int num_recs;
+	
+	if (node == NULL)
+		return;
+	num_recs =  ext4_btree_get_numrecs(node);
+	ext4_btree_trace("==rec== node in block %lld - level %d numrecs %d\n",
+		ext4_btree_get_blockno(node),
+		ext4_btree_get_level(node),
+		ext4_btree_get_numrecs(node));
+	for (i = 0; i < num_recs; i++) {
+		rec = ext4_btree_rec_addr(geo, node, i);
+		ext4_btree_trace("rec%d key 0x%llx len 0x%x refcount %d\n",
+			i, rec->key.blockno, rec->len, rec->ref_count);
+	}
+}
+
+void 
+ext4_btree_index_node_print(struct ext4_btree_geo *geo,
+			    struct ext4_btree_node *node)
+{
+	int i;
+	int num_keys;
+	struct ext4_btree_key *key;
+	struct ext4_btree_val *val;
+
+	num_keys =  ext4_btree_get_numkeys(node);
+	ext4_btree_trace("--key-- node in block %lld - level %d numkeys %d\n",
+		ext4_btree_get_blockno(node),
+		ext4_btree_get_level(node),
+		ext4_btree_get_numkeys(node));
+	for (i = 0; i < num_keys; i++)	{
+		key = ext4_btree_key_addr(geo, node, i);
+		val = ext4_btree_val_addr(geo, node, i);
+		ext4_btree_trace("pair%d key 0x%llx val 0x%llx\n",
+			i, key->blockno, val->blockno);
+	}
+}
+
+void 
+ext4_btree_print(struct ext4_btree_root *root)
+{
+	struct ext4_btree_search_path spath;
+	struct ext4_btree_node * node;
+	struct ext4_btree_val * val;
+	int level;
+	int rootlevel;
+	int pos;
+	int numkeys;
+
+	if (root == NULL || root->node == NULL) {
+		ext4_btree_trace("Empty tree\n");
+		return;
+	}
+
+	ext4_btree_trace("Btree Details:\n\n");
+	ext4_btree_init_search_path(&spath);
+	node = root->node;
+	level = rootlevel = ext4_btree_get_level(node);
+	pos = 0;
+	while (level >= 0) {
+		spath.node[level] = node;
+		spath.pos[level] = pos;
+		if (level > 0) {
+			if (pos == 0)
+				ext4_btree_index_node_print(&root->geo, node);
+			numkeys = ext4_btree_get_numkeys(node);
+			if (pos == numkeys) {
+				if (level == rootlevel)
+					break; /* reach the end
+				/* go to parent node */
+				level ++; 
+				node = spath.node[level];
+				pos = spath.pos[level] + 1;
+			}
+			else {
+				/* go to next child node */
+				level --;
+				val = ext4_btree_val_addr(&root->geo, 
+							  node, pos);
+				node = fs_get_btree_node(val->blockno);
+				pos = 0 ;
+			}
+		}
+		else {
+			ext4_btree_rec_node_print(&root->geo, node);
+			if (level == rootlevel)
+				break; /* reach the end
+			/* go to parent node; */
+			level ++; 
+			node = spath.node[level];
+			pos = spath.pos[level] + 1;
+		}
+	}
+	ext4_btree_trace("\n");
+}
+
+
+/*
+ * Lookup for a record contain the specified key in btree
+ * Return NULL if the key is not found
+ */
+struct ext4_btree_rec*
+ext4_btree_search_key(struct ext4_btree_root *root, 
+		      struct ext4_btree_key *key,
+		      struct ext4_btree_search_path * spath)
+{
+	unsigned int i;
+	int	level;
+	struct ext4_btree_node *node;
+	struct ext4_btree_key *tmp_key;
+	struct ext4_btree_val *tmp_val;
+	struct ext4_btree_geo *geo;
+	struct ext4_btree_rec *rec = NULL;
+
+	if (root == NULL || root->node == NULL)
+		return NULL;
+	/* Search through the key-val index nodes. */
+	node = root->node;
+	geo = &root->geo;
+	level = ext4_btree_get_level(node);
+	while (level > 0) {
+		for (i = 0; i < ext4_btree_get_numkeys(node)-1; i++) {
+			tmp_key = ext4_btree_key_addr(geo, node, i + 1);
+			if (ext4_btree_key_comp(geo, key, tmp_key) < 0) 
+				break;
+		}
+		ext4_btree_set_search_path(spath, level, node, i);
+		tmp_val = ext4_btree_val_addr(geo, node, i);
+		node = fs_get_btree_node(tmp_val->blockno);
+		/* read failure*/
+		if (node == NULL)
+			return NULL;
+		level --;
+	} 
+	/* Search the leaf node */
+	assert(ext4_btree_get_level(node) == 0);
+	rec = ext4_btree_rec_addr(geo, node, 0);
+	i = 0;
+	while (ext4_btree_rec_comp(geo, key, rec) > 0) {
+		i++;
+		if (i >= ext4_btree_get_numkeys(node)) {
+			ext4_btree_set_search_path(spath, 0, node, i);
+			return NULL;
+		}
+		rec = ext4_btree_rec_addr(geo, node, i);
+	}
+	ext4_btree_set_search_path(spath, 0, node, i);
+	if (ext4_btree_rec_comp(geo, key, rec) == 0) 
+		return rec; 
+	else 
+		return NULL;
+}
+
+/*
+ * Lookup for a record contain the specified key in btree
+ * Return NULL if the key is not found
+ */
+struct ext4_btree_rec*
+ext4_btree_lookup(struct ext4_btree_root *root, struct ext4_btree_key *key)
+{
+	return ext4_btree_search_key(root, key, NULL);
+}
+
+/*
+ * Insert a rec into a leaf node at specifid position
+ */
+void  
+ext4_btree_node_insert_in_leaf(struct ext4_btree_root *root,
+			       struct ext4_btree_node *node,
+		               struct ext4_btree_rec *rec, 
+			       int pos)
+{
+	int numrecs;
+	int i;
+	struct ext4_btree_rec *tmprec;
+	
+	numrecs= ext4_btree_get_numrecs(node);
+	for (i = numrecs; i > pos;i--) {
+		tmprec = ext4_btree_rec_addr(&root->geo, node, i-1);
+		ext4_btree_set_rec(&root->geo, node, i,tmprec);
+	}
+	ext4_btree_set_rec(&root->geo, node, pos, rec);
+	ext4_btree_update_numrecs(node,numrecs+1);
+}
+
+/*
+ * Split a leaf node into two
+ */
+struct ext4_btree_node *
+ext4_btree_split_leaf(struct ext4_btree_root *root, 
+		      struct ext4_btree_node *node)
+{
+	struct ext4_btree_node *rnode;
+	struct ext4_btree_rec  *tmprec;
+	unsigned int i;
+
+	rnode = ext4_btree_node_alloc();
+	if (!rnode) {
+		/* Cant allocate a new node, ERR*/
+		return NULL;
+	}
+	for (i = EXT4_BTREE_MAX_RECS/2; i < EXT4_BTREE_MAX_RECS;i++) {
+		tmprec = ext4_btree_rec_addr(&root->geo, node, i);
+		ext4_btree_set_rec(&root->geo, rnode, 
+			           (i-EXT4_BTREE_MAX_RECS/2), tmprec);
+		ext4_btree_clear_rec(tmprec);
+	}
+	ext4_btree_update_numrecs(rnode,EXT4_BTREE_MAX_RECS/2);
+	ext4_btree_update_numrecs(node, EXT4_BTREE_MAX_RECS/2);
+	return rnode;
+}
+
+/*
+ * Split a index node into two
+ */
+struct ext4_btree_node *
+ext4_btree_split_index(struct ext4_btree_root *root, 
+		       struct ext4_btree_node *node)
+{
+	struct ext4_btree_node *rnode;
+	struct ext4_btree_key  *tmp_key;
+	struct ext4_btree_val  *tmp_val;
+	unsigned int i;
+
+	rnode = ext4_btree_node_alloc();
+	if (!rnode) {
+		/* Cant allocate a new node, ERR*/
+		return NULL;
+	}
+	/* split key-val pairs between old node and new node */
+	for (i = EXT4_BTREE_MAX_KEY_VAL_PAIRS/2; 
+	     i < EXT4_BTREE_MAX_KEY_VAL_PAIRS; 
+	     i++) {
+		tmp_key = ext4_btree_key_addr(&root->geo, node, i);
+		ext4_btree_set_key(&root->geo, rnode, 
+				   (i-EXT4_BTREE_MAX_KEY_VAL_PAIRS/2),tmp_key);
+		ext4_btree_clear_key(tmp_key);
+		tmp_val = ext4_btree_val_addr(&root->geo, node, i);
+		ext4_btree_set_val(&root->geo, rnode, 
+				   (i-EXT4_BTREE_MAX_KEY_VAL_PAIRS/2),tmp_val);
+		ext4_btree_clear_val(tmp_val);
+	}
+	/* set level and numkeys in new node */
+	ext4_btree_update_level(rnode, ext4_btree_get_level(node));
+	ext4_btree_update_numkeys(rnode,EXT4_BTREE_MAX_KEY_VAL_PAIRS/2);
+	/* set numkeys in old node */
+	ext4_btree_update_numkeys(node, EXT4_BTREE_MAX_KEY_VAL_PAIRS/2);
+	return rnode;
+}
+
+		       
+/*
+ * Insert a key-val pair into a index node at specified position
+ */
+void 
+ext4_btree_node_insert_in_index(struct ext4_btree_root *root, 
+     			       struct ext4_btree_node *node,
+			       struct ext4_btree_key *key,
+			       struct ext4_btree_val *val, 
+			       int pos)	
+{
+	int i;
+	int numkeys;
+	struct ext4_btree_key *pkey;
+	struct ext4_btree_val *pval;
+
+	numkeys = ext4_btree_get_numkeys(node);
+	for (i = numkeys - 1; i >= pos; i--) {
+		pkey = ext4_btree_key_addr(&root->geo, node, i);
+		ext4_btree_set_key(&root->geo, node, i + 1, pkey);
+		pval = ext4_btree_val_addr(&root->geo, node, i);
+		ext4_btree_set_val(&root->geo, node, i + 1, pval);
+	}
+	ext4_btree_set_key(&root->geo, node, pos, key);
+	ext4_btree_set_val(&root->geo, node, pos, val);
+	ext4_btree_update_numkeys(node, numkeys + 1);
+}
+
+/*
+ * Grow tree by one more level. Return the new root node.
+ */
+struct ext4_btree_node *
+ext4_btree_grow(struct ext4_btree_root *root,
+		struct ext4_btree_node *lnode,
+		struct ext4_btree_node *rnode,
+		int level)
+{
+	struct ext4_btree_node * newroot;
+	struct ext4_btree_key * key;
+	struct ext4_btree_val val;
+
+	newroot = ext4_btree_node_alloc();
+	if (!newroot) {
+		/* Cant allocate a new node, ERR*/
+		return NULL;
+	}
+
+	ext4_btree_update_level(newroot, level);
+
+	key = ext4_btree_key_addr(&root->geo, lnode, 0);	
+	ext4_btree_set_key(&root->geo, newroot, 0, key);
+	val.blockno = ext4_btree_get_blockno(lnode);
+	ext4_btree_set_val(&root->geo, newroot, 0, &val);
+
+	key = ext4_btree_key_addr(&root->geo, rnode, 0);	
+	ext4_btree_set_key(&root->geo, newroot, 1, key);
+	val.blockno = ext4_btree_get_blockno(rnode);
+	ext4_btree_set_val(&root->geo, newroot, 1, &val); 
+	
+	ext4_btree_update_numkeys(newroot, 2);
+
+	return newroot;
+
+}
+
+/*
+ * Insert a record to the btree
+ */
+int 
+ext4_btree_insert(struct ext4_btree_root *root, struct ext4_btree_rec *rec)
+{
+	unsigned int level;
+	struct ext4_btree_node *node, *rnode, *newroot;
+	struct ext4_btree_key *key, *rkey;
+	struct ext4_btree_val rval;
+	struct ext4_btree_search_path spath;
+	int pos, full, numkeys;
+	struct ext4_btree_rec *searchrec;
+
+	if (root->node == NULL) {
+		/* empty tree */
+		node = ext4_btree_node_alloc();
+		if (node == NULL)
+			return -1;
+		root->node = node;
+		ext4_btree_update_level(root->node, 0);
+		ext4_btree_trace(
+			"==rec 0x%llx will be insert in node in block %lld "\
+			"- level %d pos %d\n",
+			rec->key.blockno,
+			ext4_btree_get_blockno(root->node),
+			ext4_btree_get_level(root->node),
+			0);
+
+		ext4_btree_node_insert_in_leaf(root, root->node, rec, 0);
+		return 0;
+	}
+
+	/* 
+	 * First we search if the key already present, 
+	 * In the search process, all levels node pointer and position are stored 
+	 * in search pointer for later use 
+	 * there is no duplicated key allowed in the tree 
+	 */
+	ext4_btree_init_search_path(&spath);
+	key = &rec->key;
+	searchrec = ext4_btree_search_key(root, key, &spath);
+
+	if (searchrec) {
+		node = spath.node[0];
+		pos = spath.pos[0];
+		ext4_btree_trace("==rec 0x%llx found in node in block %lld " \
+				 "- level %d pos %d\n",
+				rec->key.blockno,
+				ext4_btree_get_blockno(node),
+				ext4_btree_get_level(node), 
+				pos);
+		return -1;
+	}
+	level = 0;
+	node = spath.node[0];
+	pos = spath.pos[0];
+	full =  ext4_btree_get_numkeys(node) == EXT4_BTREE_MAX_RECS;
+	ext4_btree_trace(
+		"==rec 0x%llx will be insert in node in block %lld "\
+		"- level %d pos %d\n",
+		rec->key.blockno,
+		ext4_btree_get_blockno(node),
+		ext4_btree_get_level(node),
+		pos);
+
+	if (!full) {
+		ext4_btree_node_insert_in_leaf(root, node, rec, pos); 
+		while (pos == 0 && 
+		       (++level <= ext4_btree_get_level(root->node))) {
+			/* update parent key */
+			node = spath.node[level];
+			pos = spath.pos[level];
+			ext4_btree_set_key(&root->geo, node, pos, key);
+		}
+		return 0;
+	} 
+
+	/* check if B tree root is full and level reach the max */
+	if ((ext4_btree_get_level(root->node) == EXT4_BTREE_MAX_LEVELS - 1) &&
+	    (ext4_btree_get_numkeys(root->node) 
+	    == EXT4_BTREE_MAX_KEY_VAL_PAIRS)) {
+		ext4_btree_trace("Tree reach max level, no more insert\n");
+		return -1; // Tree reach the max level
+	}
+
+	/* leaf node is full, split it to make space to insert new rec*/
+	numkeys = ext4_btree_get_numkeys(node);
+	rnode = ext4_btree_split_leaf(root, node);
+	if (rnode == NULL)
+		return -1;
+	if (pos > EXT4_BTREE_MAX_RECS/2)
+		ext4_btree_node_insert_in_leaf(root, rnode, rec, 
+			pos - EXT4_BTREE_MAX_RECS/2);
+	else
+		ext4_btree_node_insert_in_leaf(root, node, rec, pos);
+	
+	/* split index nodes if full, all the way up to root */
+	while (level < ext4_btree_get_level(root->node)) {
+		/* Pick up the first key*/
+		rkey = ext4_btree_key_addr(&root->geo, rnode, 0);
+		rval.blockno = ext4_btree_get_blockno(rnode);
+		level ++;
+		node = spath.node[level];
+		pos = spath.pos[level];
+		numkeys = ext4_btree_get_numkeys(node);
+		full = (numkeys == EXT4_BTREE_MAX_KEY_VAL_PAIRS); 
+		if (!full) {
+			ext4_btree_node_insert_in_index(root, node, rkey, 
+				&rval, pos + 1); 	
+			break;
+		} 
+		rnode = ext4_btree_split_index(root, node);
+		if (rnode == NULL)
+			return -1;
+		if (pos > EXT4_BTREE_MAX_KEY_VAL_PAIRS/2 ) {
+			ext4_btree_node_insert_in_index(root, rnode, rkey, 
+				&rval, pos - EXT4_BTREE_MAX_KEY_VAL_PAIRS/2 + 1);
+		} else {
+			ext4_btree_node_insert_in_index(root, node, rkey, 
+				&rval, pos + 1);
+		}
+	}
+	if (level == ext4_btree_get_level(node) && full) {
+		/* If root is split, grow tree by one more level and assign new root */
+		newroot = ext4_btree_grow(root, node, rnode, level + 1);
+		if (newroot != NULL) {
+			root->node = newroot; 
+		}
+	}
+	return 0;
+}
+
+
+/*
+ * Delete one record from leaf node
+ */
+void
+ext4_btree_delete_one(struct ext4_btree_root *root,
+		      struct ext4_btree_node *node,
+		      int pos)
+{
+	unsigned int i;
+	struct ext4_btree_rec* tmprec;
+	unsigned int numrecs;
+
+	numrecs= ext4_btree_get_numrecs(node);
+	for (i = pos; i< numrecs - 1; i++) {
+		tmprec = ext4_btree_rec_addr(&root->geo, node, i + 1);
+		ext4_btree_set_rec(&root->geo, node, i, tmprec);
+	}
+	ext4_btree_update_numrecs(node, numrecs - 1);
+}
+
+/*
+ * Delete one key/val pair from index node
+ */
+
+void
+ext4_btree_delete_one_keyval(struct ext4_btree_root *root,
+			     struct ext4_btree_node *node,
+			     int pos)
+{
+	unsigned int i;
+	struct ext4_btree_key* key;
+	struct ext4_btree_val* val;
+	unsigned int numkeys;
+
+	numkeys= ext4_btree_get_numkeys(node);
+	for (i = pos; i< numkeys - 1; i++) {
+		key = ext4_btree_key_addr(&root->geo, node, i + 1);
+		val = ext4_btree_val_addr(&root->geo, node, i + 1);
+		ext4_btree_set_key(&root->geo, node, i, key);
+		ext4_btree_set_val(&root->geo, node, i, val);
+	}
+	ext4_btree_update_numkeys(node, numkeys - 1);
+
+}
+
+
+/*
+ * Borrow one record or key/val pair from left sibling 
+ */
+void
+ext4_btree_borrow_one_left (struct ext4_btree_root *root,
+			    struct ext4_btree_node *parent,
+			    struct ext4_btree_node *lnode,
+			    struct ext4_btree_node *rnode,
+			    int pos, 
+			    int level)
+{
+	struct ext4_btree_rec *rec;
+	struct ext4_btree_key *key;
+	struct ext4_btree_val *val;
+	int numrecs;
+	int numkeys; 
+
+	if (level == 0) {
+		numrecs = ext4_btree_get_numrecs(lnode);
+		rec = ext4_btree_rec_addr(&root->geo, lnode, numrecs - 1);
+		key = &rec->key;
+		ext4_btree_node_insert_in_leaf(root, rnode, rec, 0);
+		ext4_btree_delete_one(root, lnode, numrecs - 1);
+	} else {
+		numkeys = ext4_btree_get_numkeys(lnode);
+		key = ext4_btree_key_addr(&root->geo, lnode, numkeys - 1);
+		val = ext4_btree_val_addr(&root->geo, lnode, numkeys - 1);
+		ext4_btree_node_insert_in_index(root, rnode, key, val, 0);
+		ext4_btree_delete_one_keyval(root, lnode, numkeys - 1);
+	}
+	/* update parent node key */
+	ext4_btree_set_key(&root->geo, parent, pos, key);
+
+}
+
+/* 
+ * Borrow one record or key/val pair from right sibling 
+ */
+void
+ext4_btree_borrow_one_right (struct ext4_btree_root *root,
+			    struct ext4_btree_node *parent,
+			    struct ext4_btree_node *lnode,
+			    struct ext4_btree_node *rnode,
+			    int pos, 
+			    int level)
+{
+	struct ext4_btree_rec *rec;
+	struct ext4_btree_key *key;
+	struct ext4_btree_val *val;
+	int numrecs;
+	int numkeys;
+
+	if (level == 0) {
+		rec = ext4_btree_rec_addr(&root->geo, rnode, 0);
+		key = &rec->key;
+		numrecs = ext4_btree_get_numrecs(lnode);
+		ext4_btree_node_insert_in_leaf(root, lnode, rec, numrecs);
+		ext4_btree_delete_one(root, rnode, 0);
+	} else {
+		key = ext4_btree_key_addr(&root->geo, rnode, 0);
+		val = ext4_btree_val_addr(&root->geo, rnode, 0);
+		numkeys = ext4_btree_get_numkeys(lnode);
+		ext4_btree_node_insert_in_index(root, lnode, key, val, numkeys);
+		ext4_btree_delete_one_keyval(root, rnode, 0);
+	}
+	/* update parent node key */
+	ext4_btree_set_key(&root->geo, parent, pos + 1, key);
+}
+
+int
+ext4_btree_rotate(struct ext4_btree_root *root, 
+		  struct ext4_btree_search_path *spath,
+		  struct ext4_btree_node *node, int level)
+{
+	struct ext4_btree_val * val;
+	struct ext4_btree_node *parent, *lnode, *rnode;
+	int pos = 0;
+	int numkeys;
+
+	parent = spath->node[level + 1];
+	pos = spath->pos[level + 1];
+
+	if (pos > 0) {
+		/* check the left sibling first*/
+		val = ext4_btree_val_addr(&root->geo, parent, pos - 1);
+		lnode = fs_get_btree_node(val->blockno);
+		if (lnode) {
+			numkeys = ext4_btree_get_numkeys(lnode); 
+			if (numkeys >= EXT4_BTREE_MIN_RECS + 1) {
+				/* we could shift a record from left sibling to the node*/
+				ext4_btree_borrow_one_left(root, parent, lnode,
+						           node, pos, level);
+				return 0;
+			}
+		}
+	}
+
+	numkeys = ext4_btree_get_numkeys(parent);
+	if (pos < numkeys -1) {
+		val = ext4_btree_val_addr(&root->geo, parent, pos + 1);
+		rnode = fs_get_btree_node(val->blockno);
+		if (rnode) {
+			numkeys = ext4_btree_get_numkeys(rnode);
+			if (numkeys >= EXT4_BTREE_MIN_RECS + 1) {
+				/* we could shift a record from left sibling to the node*/
+				ext4_btree_borrow_one_right(root, parent, node, 
+						            rnode, pos, level);
+				return 0;
+			}
+		}
+	}
+
+	return -1;
+
+}
+
+/*
+ * Merge leaf nodes
+ */
+
+int
+ext4_btree_merge_leaf(struct ext4_btree_root *root, 
+		 struct ext4_btree_search_path *spath,
+		 struct ext4_btree_node *node)
+{
+	int num, lnum, rnum; 
+	struct ext4_btree_node * parent, *lnode, *rnode;
+	int i, pos;
+	struct ext4_btree_rec *rec;
+	struct ext4_btree_val *val;
+
+	parent = spath->node[1];
+	pos = spath->pos[1];
+	
+	num = ext4_btree_get_numrecs(node);
+
+	/* try to merge left sibling first */
+	if (pos > 0) {
+		val = ext4_btree_val_addr(&root->geo, parent, pos - 1); 
+		lnode = fs_get_btree_node(val->blockno);
+		if (!lnode) {
+			return -1;
+			/* err!*/
+		}
+
+		lnum = ext4_btree_get_numrecs(lnode);
+
+		for (i = 0; i < num; i++) {
+			rec = ext4_btree_rec_addr(&root->geo, node, i);
+			ext4_btree_node_insert_in_leaf(root, lnode, rec, lnum+i);
+		}
+		// delete parent key-val pair
+		ext4_btree_delete_one_keyval(root, parent, pos);
+		ext4_btree_node_free(node);
+		return 0;
+	}
+	/* then try to merge with right sibling */
+	val = ext4_btree_val_addr(&root->geo, parent, pos + 1); 
+	rnode = fs_get_btree_node(val->blockno);
+	if (!rnode) {
+		return -1;
+		/* err!*/
+	}
+	
+	rnum = ext4_btree_get_numrecs(rnode);
+	
+	for (i = 0; i < rnum; i++) {
+		rec = ext4_btree_rec_addr(&root->geo, rnode, i);
+		ext4_btree_node_insert_in_leaf(root, node, rec, num+i);
+	}
+	// delete parent key-val pair
+	ext4_btree_delete_one_keyval(root, parent, pos + 1);
+	ext4_btree_node_free(rnode);
+	return 0;
+}
+
+
+/*
+ * Merge index nodes
+ */
+
+int
+ext4_btree_merge_index(struct ext4_btree_root *root, 
+		 struct ext4_btree_search_path *spath,
+		 struct ext4_btree_node *node, int level)
+
+{
+	int num, lnum, rnum, pnum; 
+	struct ext4_btree_node * parent, *lnode, *rnode;
+	int i, pos;
+	struct ext4_btree_key *key;
+	struct ext4_btree_val *val;
+
+	parent = spath->node[level + 1];
+	pos = spath->pos[level + 1];
+	
+	num = ext4_btree_get_numkeys(node);
+
+	/* try to merge left sibling first */
+	if (pos > 0) {
+		val = ext4_btree_val_addr(&root->geo, parent, pos - 1); 
+		lnode = fs_get_btree_node(val->blockno);
+		if (!lnode) {
+			/* err!*/
+			return -1;
+		}
+
+		lnum = ext4_btree_get_numkeys(lnode);
+
+		for (i = 0; i < num; i++) {
+			key = ext4_btree_key_addr(&root->geo, node, i);
+			val = ext4_btree_val_addr(&root->geo, node, i);
+			ext4_btree_node_insert_in_index(root, lnode, key, 
+							val, lnum + i);
+		}
+		// delete parent key-val pair
+		ext4_btree_delete_one_keyval(root, parent, pos);
+		ext4_btree_node_free(node);
+		return 0;
+	}
+	pnum = ext4_btree_get_numkeys(parent);
+	if (pnum > 1) {
+		/* then try to merge with right sibling */
+		val = ext4_btree_val_addr(&root->geo, parent, 1);  /* pos is always 0*/
+		rnode = fs_get_btree_node(val->blockno);
+		if (!rnode) {
+			return -1;
+			/* err!*/
+		}
+		
+		rnum = ext4_btree_get_numkeys(rnode);
+		
+		for (i = 0; i < rnum; i++) {
+			key = ext4_btree_key_addr(&root->geo, rnode, i);
+			val = ext4_btree_val_addr(&root->geo, rnode, i);
+			ext4_btree_node_insert_in_index(root, node, key, 
+							val, num + i);
+		}
+		// delete parent key-val pair
+		ext4_btree_delete_one_keyval(root, parent, pos + 1);
+		ext4_btree_node_free(rnode);
+		ext4_btree_print(root);
+	} else{
+		/* shrink level */
+		ext4_btree_node_free(root->node);
+		root->node = node;
+		ext4_btree_print(root);
+	}
+
+	return 0;
+}
+
+
+/*
+ * Delete a record from the btree
+ */
+int 
+ext4_btree_delete(struct ext4_btree_root *root, struct ext4_btree_key *key)
+{
+	struct ext4_btree_node *node, *parent;
+	struct ext4_btree_search_path spath;
+	int pos, p_pos;
+	struct ext4_btree_rec *searchrec;
+	int level;
+	int tree_height = ext4_btree_get_level(root->node);
+	int ret;
+
+	/* 
+	 * First we search if the key already present, 
+	 * In the search process, all levels node pointer and position are stored 
+	 * in search pointer for later use 
+	 * there is no duplicated key allowed in the tree 
+	 */
+	ext4_btree_init_search_path(&spath);
+	searchrec = ext4_btree_search_key(root, key, &spath);
+	if (!searchrec) 
+		/* record doesnt exist */
+		return -1;
+	node = spath.node[0];
+	pos = spath.pos[0];
+	ext4_btree_trace("==Delete : rec 0x%llx found in node in block %lld " \
+			 "- level %d pos %d\n",
+			key->blockno,
+			ext4_btree_get_blockno(node),
+			ext4_btree_get_level(node), 
+			pos);
+	ext4_btree_delete_one(root, node, pos);
+	if (pos == 0) {
+		/* update parent key */
+		parent = spath.node[1];
+		p_pos = spath.pos[1];
+		key = ext4_btree_key_addr(&root->geo, node, 0);
+		ext4_btree_set_key(&root->geo, parent, p_pos, key);
+	}
+	/*
+	 * If the node is root node, which we do not require minmum number of records,
+	 * then we are good now, exit 
+	 */
+	if (ext4_btree_get_level(root->node) == 0)
+		return 0;
+	if (ext4_btree_get_numrecs(node) >= EXT4_BTREE_MIN_RECS) 
+		return 0;
+
+	level = 0;
+	while (level < tree_height && 
+	       ext4_btree_get_numrecs(node) < EXT4_BTREE_MIN_RECS) {
+		ret = ext4_btree_rotate(root, &spath, node, level);
+		if (!ret)
+			return 0; /* node can be rotated with sibling nodes */
+
+		if (level == 0) 
+			ret = ext4_btree_merge_leaf(root, &spath, node);
+		else
+
+			ret = ext4_btree_merge_index(root, &spath, 
+				                     node, level);
+		if (ret) {
+			/* err*/
+			return ret;
+		}
+		level ++;
+		node = spath.node[level];
+	}
+	return 0;
+}
+
+
+/* 
+ * Check if a btree is valid 
+ */
+
+int 
+ext4_btree_index_node_check(struct ext4_btree_root * root, 
+			    struct ext4_btree_node *node)
+{
+	int i;
+	int num_keys;
+	struct ext4_btree_key *key, *lkey = NULL;
+	struct ext4_btree_val *val;
+
+	if (node == NULL)
+		return -1;
+
+	num_keys =  ext4_btree_get_numkeys(node);
+	if (root->node != node) {
+		// non-root node
+		if (num_keys < EXT4_BTREE_MIN_KEY_VAL_PAIRS || 
+		    num_keys > EXT4_BTREE_MAX_KEY_VAL_PAIRS) {
+			ext4_btree_trace("Invalid numkeys %d in node %lld -  " \
+				"level %d\n",
+				num_keys,
+				ext4_btree_get_blockno(node),
+				ext4_btree_get_level(node));
+			return -2;
+		}
+	}
+
+	for (i = 0; i < num_keys; i++)	{
+		key = ext4_btree_key_addr(&root->geo, node, i);
+		val = ext4_btree_val_addr(&root->geo, node, i);
+		if (lkey != NULL && ext4_btree_key_comp(&root->geo, lkey, key) >= 0) {
+			ext4_btree_trace("Keys are not sorted in node %lld" \
+				"- level %d lkey %d 0x%llx key %d 0x%llx\n",
+				ext4_btree_get_blockno(node),
+				ext4_btree_get_level(node), 
+				i-1, key->blockno, i, key->blockno);
+			return -3;
+		}
+		lkey = key;
+	}
+	return 0;
+}
+
+			     
+int 
+ext4_btree_rec_node_check(struct ext4_btree_root * root, 
+   		          struct ext4_btree_node *node)
+{
+	int i;
+	struct ext4_btree_rec *rec, *lrec = NULL;
+	int num_recs; 
+	
+	if (node == NULL)
+		return -1;
+	
+	num_recs =  ext4_btree_get_numrecs(node);
+	if (root->node != node) {
+		// non-root node
+		if (num_recs < EXT4_BTREE_MIN_RECS || 
+		    num_recs > EXT4_BTREE_MAX_RECS) {
+			ext4_btree_trace("Invalid numrecs %d in node %lld -  " \
+				"level %d\n",
+				num_recs,
+				ext4_btree_get_blockno(node),
+				ext4_btree_get_level(node));
+			return -2;
+		}
+	}
+	for (i = 0; i < num_recs; i++) {
+		rec = ext4_btree_rec_addr(&root->geo, node, i);
+		if (lrec != NULL && 
+		    ext4_btree_key_comp(&root->geo, &lrec->key, &rec->key) >= 0) {
+			ext4_btree_trace("Rec are not sorted in block 0x%llx" \
+				"lkey %d 0x%llx key %d 0x%llx \n", 
+				ext4_btree_get_blockno(node), 
+				i - 1, lrec->key.blockno, i, rec->key.blockno);
+			return -3;
+		}
+	}
+	return 0;
+}
+
+int 
+ext4_btree_valid_check(struct ext4_btree_root *root)
+{
+	struct ext4_btree_search_path spath;
+	struct ext4_btree_header * header;
+	struct ext4_btree_node * node;
+	struct ext4_btree_val * val;
+	int level;
+	int rootlevel;
+	int pos;
+	int numkeys;
+	int result;
+	
+	if (root == NULL || root->node == NULL) {
+		return 0;
+	}
+	/* check geo */
+	if (root->geo.header_len == 0 ||
+	    root->geo.node_len == 0 ||
+	    root->geo.key_len == 0 ||
+	    root->geo.val_len == 0 ||
+	    root->geo.rec_len == 0 ||
+	    root->geo.max_pairs == 0 ||
+	    root->geo.max_records == 0)
+	{
+		ext4_btree_trace("tree geo is invalid %d %d %d %d %d %d %d\n",
+			root->geo.header_len,
+			root->geo.node_len,
+			root->geo.key_len, 
+			root->geo.val_len, 
+			root->geo.rec_len,
+			root->geo.max_pairs, 
+			root->geo.max_records);
+		return -1;
+	}
+	if (root->geo.max_pairs % 2 == 1 || root->geo.max_records % 2 == 1) {
+		ext4_btree_trace("tree geo does not support odd pairs %d %d\n",
+			root->geo.max_pairs, 
+			root->geo.max_records);
+		return -1;
+	}
+	    
+	/* check tree */
+	ext4_btree_init_search_path(&spath);
+	node = root->node;
+	level = rootlevel = ext4_btree_get_level(node);
+	pos = 0;
+	while (level >= 0) {
+		spath.node[level] = node;
+		spath.pos[level] = pos;
+		header = ext4_btree_header_addr(node);
+		if (header->btree_magic != REF_BTREE_MAGIC) {
+			ext4_btree_trace("node 0x%p block %lld has no MAGIC stamp. level %d pos %d\n", 
+					 node, ext4_btree_get_blockno(node), level, pos);
+			return -1;
+		}
+		if (level > 0) {
+			if (pos == 0) {
+				result = ext4_btree_index_node_check(root, 
+								     node);
+				if (result != 0)
+					return result;
+			}
+			numkeys = ext4_btree_get_numkeys(node);
+			if (pos == numkeys) {
+				if (level == rootlevel)
+					break; /* reach the end
+				/* go to parent node */
+				level ++; 
+				node = spath.node[level];
+				pos = spath.pos[level] + 1;
+			}
+			else {
+				/* go to next child node */
+				level --;
+				val = ext4_btree_val_addr(&root->geo, 
+							  node, pos);
+				node = fs_get_btree_node(val->blockno);
+				pos = 0;
+			}
+		}
+		else {
+			result = ext4_btree_rec_node_check(root, node);
+			if (result != 0)
+				return result;
+			if (level == rootlevel)
+				break; // reach the end
+			/* go to parent node; */
+			level ++; 
+			node = spath.node[level];
+			pos = spath.pos[level] + 1;
+		}
+	}
+	return 0;
+}
-- 
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-ext4" in



[Index of Archives]     [Reiser Filesystem Development]     [Ceph FS]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite National Park]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Media]

  Powered by Linux