[Patch 5/4] chunkd: switch objcache to hash

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On second thought, let's switch objcache from list to hash. This should
help the lookup times if there's a large number of outstanding I/Os.

This also adds the objcache_count for self-testing purpoes.
And a few comments.

Signed-off-by: Pete Zaitcev <zaitcev@xxxxxxxxxx>

---
 include/objcache.h   |    9 +++++---
 server/objcache.c    |   42 +++++++++++++++++++++++++----------------
 test/objcache-unit.c |    8 ++++++-
 3 files changed, 39 insertions(+), 20 deletions(-)

I think you mentioned that your scripts dealt with 5/4 patch numbering.
Just in case, setting references by replying to a previous patch mail.

commit 6238aa7321335b0a3aba9736ee1467777bdd6861
Author: Master <zaitcev@xxxxxxxxxxxxxxxxxx>
Date:   Wed Jan 6 22:04:20 2010 -0700

    Switch objcache from list to hash to make lookups faster.

diff --git a/include/objcache.h b/include/objcache.h
index fce1485..6e36e86 100644
--- a/include/objcache.h
+++ b/include/objcache.h
@@ -20,16 +20,14 @@
 #define _CHUNKD_OBJCACHE_H_
 
 #include <glib.h>
-#include <elist.h>
 #include <stdbool.h>
 
 struct objcache {
-	struct list_head head;
 	GMutex *lock;
+	GHashTable *table;
 };
 
 struct objcache_entry {
-	struct list_head link;
 	unsigned int hash;
 	unsigned int flags;
 	int ref;
@@ -60,6 +58,11 @@ extern bool objcache_test_dirty(struct objcache *cache,
 extern void objcache_put(struct objcache *cache, struct objcache_entry *entry);
 
 /*
+ * Count objects in the cache. Can be slow, and used only for debugging.
+ */
+extern int objcache_count(struct objcache *cache);
+
+/*
  * Init a cache. Call once. May fail since it allocates a mutex.
  */
 extern int objcache_init(struct objcache *cache);
diff --git a/server/objcache.c b/server/objcache.c
index 475ac23..e969f89 100644
--- a/server/objcache.c
+++ b/server/objcache.c
@@ -40,18 +40,6 @@ static unsigned int objcache_hash(const char *key, int klen)
 	return hash;
 }
 
-static struct objcache_entry *objcache_lookup(struct objcache *cache,
-					      unsigned int hash)
-{
-	struct objcache_entry *cep;
-
-	list_for_each_entry(cep, &cache->head, link) {
-		if (cep->hash == hash)
-			return cep;
-	}
-	return NULL;
-}
-
 static struct objcache_entry *objcache_insert(struct objcache *cache,
 					      unsigned int hash)
 {
@@ -63,10 +51,18 @@ static struct objcache_entry *objcache_insert(struct objcache *cache,
 	cep->hash = hash;
 	cep->flags = 0;
 	cep->ref = 1;
-	list_add(&cep->link, &cache->head);
+	g_hash_table_insert(cache->table, &cep->hash, cep);
 	return cep;
 }
 
+/*
+ * Observe the way we handle conflicts in the computed hash: we treat the
+ * keys with the same hash as same. It's acceptable in our application.
+ * At worst, an unrelated activity main in chunkd may spook self-check.
+ * This policy remains the same for list, tree, hash or any other implementing
+ * structure. If we use Glib's hash, it can have its own conflicts over
+ * a shared bucket indexed with our hash. We don't know anything about those.
+ */
 struct objcache_entry *__objcache_get(struct objcache *cache,
 				      const char *key, int klen,
 				      unsigned int flag)
@@ -76,7 +72,7 @@ struct objcache_entry *__objcache_get(struct objcache *cache,
 
 	hash = objcache_hash(key, klen);
 	g_mutex_lock(cache->lock);
-	cep = objcache_lookup(cache, hash);
+	cep = g_hash_table_lookup(cache->table, &hash);
 	if (cep) {
 		cep->ref++;
 	} else {
@@ -107,22 +103,36 @@ void objcache_put(struct objcache *cache, struct objcache_entry *cep)
 	}
 	--cep->ref;
 	if (!cep->ref) {
-		list_del(&cep->link);
+		gboolean rcb;
+		rcb = g_hash_table_remove(cache->table, &cep->hash);
+		/*
+		 * We are so super sure that this cannot happen that
+		 * we use abort(), which is not welcome in daemons.
+		 */
+		if (!rcb)
+			abort();
 		free(cep);
 	}
 	g_mutex_unlock(cache->lock);
 }
 
+int objcache_count(struct objcache *cache)
+{
+	return g_hash_table_size(cache->table);
+}
+
 int objcache_init(struct objcache *cache)
 {
 	cache->lock = g_mutex_new();
 	if (!cache->lock)
 		return -1;
-	INIT_LIST_HEAD(&cache->head);
+	/* We do not use g_str_hash becuse our keys may have nul bytes. */
+	cache->table = g_hash_table_new(g_int_hash, g_int_equal);
 	return 0;
 }
 
 void objcache_fini(struct objcache *cache)
 {
 	g_mutex_free(cache->lock);
+	g_hash_table_destroy(cache->table);
 }
diff --git a/test/objcache-unit.c b/test/objcache-unit.c
index f101445..874b57d 100644
--- a/test/objcache-unit.c
+++ b/test/objcache-unit.c
@@ -42,7 +42,10 @@ int main(int argc, char *argv[])
 	ep3 = objcache_get(&cache, k3, sizeof(k3));
 	OK(ep3 != NULL);
 
-	OK(ep1->ref == 1);	/* no collisions */
+	rc = objcache_count(&cache);
+	OK(rc == 3);
+
+	OK(ep1->ref == 1);	/* no collisions, else improve hash */
 
 	objcache_put(&cache, ep1);
 	objcache_put(&cache, ep2);
@@ -53,6 +56,9 @@ int main(int argc, char *argv[])
 	OK(ep2->ref == 1);	/* new */
 	objcache_put(&cache, ep2);
 
+	rc = objcache_count(&cache);
+	OK(rc == 0);
+
 	objcache_fini(&cache);
 	return 0;
 }
--
To unsubscribe from this list: send the line "unsubscribe hail-devel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Fedora Clound]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux