[PATCH 7/8] Use xvmalloc to store compressed chunks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



xvmalloc is an O(1) memory allocator designed specifically
for storing variable sized compressed chunks. It is already
being used by zram driver for the same purpose.

A new statistic is also exported:
/sys/kernel/mm/zcache/pool<id>/mem_used_total

This gives pool's total memory usage, including allocator
fragmentation and metadata overhead.

Currently, we use just one xvmalloc pool per zcache pool.
If this proves to be a performance bottleneck, they will
also be created per-cpu.

xvmalloc details, performance numbers and its comparison
with kmalloc (SLUB):

http://code.google.com/p/compcache/wiki/xvMalloc
http://code.google.com/p/compcache/wiki/xvMallocPerformance
http://code.google.com/p/compcache/wiki/AllocatorsComparison

Signed-off-by: Nitin Gupta <ngupta@xxxxxxxxxx>
---
 drivers/staging/zram/zcache_drv.c |  150 +++++++++++++++++++++++++++++-------
 drivers/staging/zram/zcache_drv.h |    6 ++
 2 files changed, 127 insertions(+), 29 deletions(-)

diff --git a/drivers/staging/zram/zcache_drv.c b/drivers/staging/zram/zcache_drv.c
index 2a02606..71ca48a 100644
--- a/drivers/staging/zram/zcache_drv.c
+++ b/drivers/staging/zram/zcache_drv.c
@@ -47,6 +47,7 @@
 #include <linux/slab.h>
 #include <linux/u64_stats_sync.h>
 
+#include "xvmalloc.h"
 #include "zcache_drv.h"
 
 static DEFINE_PER_CPU(unsigned char *, compress_buffer);
@@ -179,6 +180,7 @@ static void zcache_destroy_pool(struct zcache_pool *zpool)
 	}
 
 	free_percpu(zpool->stats);
+	xv_destroy_pool(zpool->xv_pool);
 	kfree(zpool);
 }
 
@@ -219,6 +221,12 @@ int zcache_create_pool(void)
 		goto out;
 	}
 
+	zpool->xv_pool = xv_create_pool();
+	if (!zpool->xv_pool) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	rwlock_init(&zpool->tree_lock);
 	seqlock_init(&zpool->memlimit_lock);
 	zpool->inode_tree = RB_ROOT;
@@ -446,35 +454,81 @@ static void *zcache_index_to_ptr(unsigned long index)
 }
 
 /*
+ * Encode <page, offset> as a single "pointer" value which is stored
+ * in corresponding radix node.
+ */
+static void *zcache_xv_location_to_ptr(struct page *page, u32 offset)
+{
+	unsigned long ptrval;
+
+	ptrval = page_to_pfn(page) << PAGE_SHIFT;
+	ptrval |= (offset & ~PAGE_MASK);
+
+	return (void *)ptrval;
+}
+
+/*
+ * Decode <page, offset> pair from "pointer" value returned from
+ * radix tree lookup.
+ */
+static void zcache_ptr_to_xv_location(void *ptr, struct page **page,
+				u32 *offset)
+{
+	unsigned long ptrval = (unsigned long)ptr;
+
+	*page = pfn_to_page(ptrval >> PAGE_SHIFT);
+	*offset = ptrval & ~PAGE_MASK;
+}
+
+/*
  * Radix node contains "pointer" value which encode <page, offset>
  * pair, locating the compressed object. Header of the object then
  * contains corresponding 'index' value.
  */
-static unsigned long zcache_ptr_to_index(struct page *page)
+static unsigned long zcache_ptr_to_index(void *ptr)
 {
+	u32 offset;
+	struct page *page;
 	unsigned long index;
+	struct zcache_objheader *zheader;
 
-	if (zcache_is_zero_page(page))
-		index = (unsigned long)(page) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
-	else
-		index = page->index;
+	if (zcache_is_zero_page(ptr))
+		return (unsigned long)(ptr) >> ZCACHE_ZERO_PAGE_INDEX_SHIFT;
+
+	zcache_ptr_to_xv_location(ptr, &page, &offset);
+
+	zheader = kmap_atomic(page, KM_USER0) + offset;
+	index = zheader->index;
+	kunmap_atomic(zheader, KM_USER0);
 
 	return index;
 }
 
-void zcache_free_page(struct zcache_pool *zpool, struct page *page)
+void zcache_free_page(struct zcache_pool *zpool, void *ptr)
 {
 	int is_zero;
+	unsigned long flags;
 
-	if (unlikely(!page))
+	if (unlikely(!ptr))
 		return;
 
-	is_zero = zcache_is_zero_page(page);
+	is_zero = zcache_is_zero_page(ptr);
 	if (!is_zero) {
-		int clen = page->private;
+		int clen;
+		void *obj;
+		u32 offset;
+		struct page *page;
+
+		zcache_ptr_to_xv_location(ptr, &page, &offset);
+		obj = kmap_atomic(page, KM_USER0) + offset;
+		clen = xv_get_object_size(obj) -
+				sizeof(struct zcache_objheader);
+		kunmap_atomic(obj, KM_USER0);
 
 		zcache_add_stat(zpool, ZPOOL_STAT_COMPR_SIZE, -clen);
-		__free_page(page);
+		local_irq_save(flags);
+		xv_free(zpool->xv_pool, page, offset);
+		local_irq_restore(flags);
 	}
 
 	zcache_dec_pages(zpool, is_zero);
@@ -491,24 +545,23 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
 			pgoff_t index, struct page *page, int is_zero)
 {
 	int ret;
+	void *nodeptr;
 	size_t clen;
 	unsigned long flags;
+
+	u32 zoffset;
 	struct page *zpage;
 	unsigned char *zbuffer, *zworkmem;
 	unsigned char *src_data, *dest_data;
+
+	struct zcache_objheader *zheader;
 	struct zcache_pool *zpool = znode->pool;
 
 	if (is_zero) {
-		zpage = zcache_index_to_ptr(index);
+		nodeptr = zcache_index_to_ptr(index);
 		goto out_store;
 	}
 
-	zpage = alloc_page(GFP_NOWAIT);
-	if (!zpage) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
 	preempt_disable();
 	zbuffer = __get_cpu_var(compress_buffer);
 	zworkmem = __get_cpu_var(compress_workmem);
@@ -528,17 +581,32 @@ static int zcache_store_page(struct zcache_inode_rb *znode,
 		goto out;
 	}
 
-	dest_data = kmap_atomic(zpage, KM_USER0);
+	local_irq_save(flags);
+	ret = xv_malloc(zpool->xv_pool, clen + sizeof(*zheader),
+			&zpage, &zoffset, GFP_NOWAIT);
+	local_irq_restore(flags);
+	if (unlikely(ret)) {
+		ret = -ENOMEM;
+		preempt_enable();
+		goto out;
+	}
+
+	dest_data = kmap_atomic(zpage, KM_USER0) + zoffset;
+
+	/* Store index value in header */
+	zheader = (struct zcache_objheader *)dest_data;
+	zheader->index = index;
+	dest_data += sizeof(*zheader);
+
 	memcpy(dest_data, zbuffer, clen);
 	kunmap_atomic(dest_data, KM_USER0);
 	preempt_enable();
 
-	zpage->index = index;
-	zpage->private = clen;
+	nodeptr = zcache_xv_location_to_ptr(zpage, zoffset);
 
 out_store:
 	spin_lock_irqsave(&znode->tree_lock, flags);
-	ret = radix_tree_insert(&znode->page_tree, index, zpage);
+	ret = radix_tree_insert(&znode->page_tree, index, nodeptr);
 	if (unlikely(ret)) {
 		spin_unlock_irqrestore(&znode->tree_lock, flags);
 		if (!is_zero)
@@ -752,6 +820,19 @@ static ssize_t compr_data_size_show(struct kobject *kobj,
 }
 ZCACHE_POOL_ATTR_RO(compr_data_size);
 
+/*
+ * Total memory used by this pool, including allocator fragmentation
+ * and metadata overhead.
+ */
+static ssize_t mem_used_total_show(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
+
+	return sprintf(buf, "%llu\n", xv_get_total_size_bytes(zpool->xv_pool));
+}
+ZCACHE_POOL_ATTR_RO(mem_used_total);
+
 static void memlimit_sysfs_common(struct kobject *kobj, u64 *value, int store)
 {
 	struct zcache_pool *zpool = zcache_kobj_to_pool(kobj);
@@ -795,6 +876,7 @@ static struct attribute *zcache_pool_attrs[] = {
 	&zero_pages_attr.attr,
 	&orig_data_size_attr.attr,
 	&compr_data_size_attr.attr,
+	&mem_used_total_attr.attr,
 	&memlimit_attr.attr,
 	NULL,
 };
@@ -904,13 +986,17 @@ static int zcache_init_shared_fs(char *uuid, size_t pagesize)
 static int zcache_get_page(int pool_id, ino_t inode_no,
 			pgoff_t index, struct page *page)
 {
-	int ret = -1;
+	int ret;
+	void *nodeptr;
 	size_t clen;
 	unsigned long flags;
+
+	u32 offset;
 	struct page *src_page;
 	unsigned char *src_data, *dest_data;
 
 	struct zcache_inode_rb *znode;
+	struct zcache_objheader *zheader;
 	struct zcache_pool *zpool = zcache->pools[pool_id];
 
 	znode = zcache_find_inode(zpool, inode_no);
@@ -922,29 +1008,35 @@ static int zcache_get_page(int pool_id, ino_t inode_no,
 	BUG_ON(znode->inode_no != inode_no);
 
 	spin_lock_irqsave(&znode->tree_lock, flags);
-	src_page = radix_tree_delete(&znode->page_tree, index);
+	nodeptr = radix_tree_delete(&znode->page_tree, index);
 	if (zcache_inode_is_empty(znode))
 		zcache_inode_isolate(znode);
 	spin_unlock_irqrestore(&znode->tree_lock, flags);
 
 	kref_put(&znode->refcount, zcache_inode_release);
 
-	if (!src_page) {
+	if (!nodeptr) {
 		ret = -EFAULT;
 		goto out;
 	}
 
-	if (zcache_is_zero_page(src_page)) {
+	if (zcache_is_zero_page(nodeptr)) {
 		zcache_handle_zero_page(page);
 		goto out_free;
 	}
 
 	clen = PAGE_SIZE;
-	src_data = kmap_atomic(src_page, KM_USER0);
+	zcache_ptr_to_xv_location(nodeptr, &src_page, &offset);
+
+	src_data = kmap_atomic(src_page, KM_USER0) + offset;
+	zheader = (struct zcache_objheader *)src_data;
+	BUG_ON(zheader->index != index);
+
 	dest_data = kmap_atomic(page, KM_USER1);
 
-	ret = lzo1x_decompress_safe(src_data, src_page->private,
-				dest_data, &clen);
+	ret = lzo1x_decompress_safe(src_data + sizeof(*zheader),
+			xv_get_object_size(src_data) - sizeof(*zheader),
+			dest_data, &clen);
 
 	kunmap_atomic(src_data, KM_USER0);
 	kunmap_atomic(dest_data, KM_USER1);
@@ -956,7 +1048,7 @@ static int zcache_get_page(int pool_id, ino_t inode_no,
 	flush_dcache_page(page);
 
 out_free:
-	zcache_free_page(zpool, src_page);
+	zcache_free_page(zpool, nodeptr);
 	ret = 0; /* success */
 
 out:
diff --git a/drivers/staging/zram/zcache_drv.h b/drivers/staging/zram/zcache_drv.h
index 9ce97da..7283116 100644
--- a/drivers/staging/zram/zcache_drv.h
+++ b/drivers/staging/zram/zcache_drv.h
@@ -41,6 +41,11 @@ static const unsigned zcache_pool_default_memlimit_perc_ram = 10;
  /* We only keep pages that compress to less than this size */
 static const int zcache_max_page_size = PAGE_SIZE / 2;
 
+/* Stored in the beginning of each compressed object */
+struct zcache_objheader {
+	unsigned long index;
+};
+
 /* Red-Black tree node. Maps inode to its page-tree */
 struct zcache_inode_rb {
 	struct radix_tree_root page_tree; /* maps inode index to page */
@@ -64,6 +69,7 @@ struct zcache_pool {
 	seqlock_t memlimit_lock;	/* protects memlimit */
 	u64 memlimit;			/* bytes */
 
+	struct xv_pool *xv_pool;	/* xvmalloc pool */
 	struct zcache_pool_stats_cpu *stats;	/* percpu stats */
 #ifdef CONFIG_SYSFS
 	unsigned char name[MAX_ZPOOL_NAME_LEN];
-- 
1.7.1.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxxx  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>


[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]