[PATCH 2/3] zsmalloc: mark its page "PG_movable"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch mark zsmalloc's page "PG_movable" and introduce the function
for the interfaces zs_isolatepage, zs_isolatepage and zs_migratepage.

Signed-off-by: Hui Zhu <zhuhui@xxxxxxxxxx>
---
 include/linux/mm_types.h |   5 +
 mm/zsmalloc.c            | 416 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 402 insertions(+), 19 deletions(-)

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 132afb0..3975249 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -202,6 +202,11 @@ struct page {
 	void (*put)(struct page *page);
 	int (*migrate)(struct page *page, struct page *newpage, int force,
 		       enum migrate_mode mode);
+
+#ifdef CONFIG_ZSMALLOC
+	void *zs_class;
+	struct list_head zs_lru;
+#endif
 }
 /*
  * The struct page can be forced to be double word aligned so that atomic ops
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index f135b1b..ded3134 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -21,8 +21,8 @@
  *		starting in this page. For the first page, this is
  *		always 0, so we use this field (aka freelist) to point
  *		to the first free object in zspage.
- *	page->lru: links together all component pages (except the first page)
- *		of a zspage
+ *	page->zs_lru: links together all component pages (except the first
+ *		page) of a zspage
  *
  *	For _first_ page only:
  *
@@ -35,7 +35,7 @@
  *		metadata.
  *	page->objects: maximum number of objects we can store in this
  *		zspage (class->zspage_order * PAGE_SIZE / class->size)
- *	page->lru: links together first pages of various zspages.
+ *	page->zs_lru: links together first pages of various zspages.
  *		Basically forming list of zspages in a fullness group.
  *	page->mapping: class index and fullness group of the zspage
  *
@@ -64,6 +64,9 @@
 #include <linux/debugfs.h>
 #include <linux/zsmalloc.h>
 #include <linux/zpool.h>
+#include <linux/migrate.h>
+#include <linux/rwlock.h>
+#include <linux/mm.h>
 
 /*
  * This must be power of 2 and greater than of equal to sizeof(link_free).
@@ -214,6 +217,8 @@ struct size_class {
 
 	/* huge object: pages_per_zspage == 1 && maxobj_per_zspage == 1 */
 	bool huge;
+
+	atomic_t count;
 };
 
 /*
@@ -279,6 +284,9 @@ struct mapping_area {
 	bool huge;
 };
 
+static rwlock_t zs_class_rwlock;
+static rwlock_t zs_tag_rwlock;
+
 static int create_handle_cache(struct zs_pool *pool)
 {
 	pool->handle_cachep = kmem_cache_create("zs_handle", ZS_HANDLE_SIZE,
@@ -656,7 +664,7 @@ static void insert_zspage(struct page *page, struct size_class *class,
 	 * We want to see more ZS_FULL pages and less almost
 	 * empty/full. Put pages with higher ->inuse first.
 	 */
-	list_add_tail(&page->lru, &(*head)->lru);
+	list_add_tail(&page->zs_lru, &(*head)->zs_lru);
 	if (page->inuse >= (*head)->inuse)
 		*head = page;
 }
@@ -677,17 +685,38 @@ static void remove_zspage(struct page *page, struct size_class *class,
 
 	head = &class->fullness_list[fullness];
 	BUG_ON(!*head);
-	if (list_empty(&(*head)->lru))
+	if (list_empty(&(*head)->zs_lru))
 		*head = NULL;
 	else if (*head == page)
-		*head = (struct page *)list_entry((*head)->lru.next,
-					struct page, lru);
+		*head = (struct page *)list_entry((*head)->zs_lru.next,
+					struct page, zs_lru);
 
-	list_del_init(&page->lru);
+	list_del_init(&page->zs_lru);
 	zs_stat_dec(class, fullness == ZS_ALMOST_EMPTY ?
 			CLASS_ALMOST_EMPTY : CLASS_ALMOST_FULL, 1);
 }
 
+static void replace_zspage_first_page(struct size_class *class,
+				      struct page *page,
+				      struct page *newpage)
+{
+	struct page **head;
+	int class_idx;
+	enum fullness_group fg;
+
+	BUG_ON(!is_first_page(page));
+	BUG_ON(!is_first_page(newpage));
+
+	get_zspage_mapping(page, &class_idx, &fg);
+
+	head = &class->fullness_list[fg];
+	BUG_ON(!*head);
+	if (*head == page)
+		*head = newpage;
+
+	list_replace(&page->zs_lru, &newpage->zs_lru);
+}
+
 /*
  * Each size class maintains zspages in different fullness groups depending
  * on the number of live objects they contain. When allocating or freeing
@@ -776,7 +805,7 @@ static struct page *get_next_page(struct page *page)
 	else if (is_first_page(page))
 		next = (struct page *)page_private(page);
 	else
-		next = list_entry(page->lru.next, struct page, lru);
+		next = list_entry(page->zs_lru.next, struct page, zs_lru);
 
 	return next;
 }
@@ -809,9 +838,14 @@ static void *location_to_obj(struct page *page, unsigned long obj_idx)
 static void obj_to_location(unsigned long obj, struct page **page,
 				unsigned long *obj_idx)
 {
-	obj >>= OBJ_TAG_BITS;
-	*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
-	*obj_idx = (obj & OBJ_INDEX_MASK);
+	if (obj == 0) {
+		*page = NULL;
+		*obj_idx = 0;
+	} else {
+		obj >>= OBJ_TAG_BITS;
+		*page = pfn_to_page(obj >> OBJ_INDEX_BITS);
+		*obj_idx = (obj & OBJ_INDEX_MASK);
+	}
 }
 
 static unsigned long handle_to_obj(unsigned long handle)
@@ -859,6 +893,8 @@ static void unpin_tag(unsigned long handle)
 	clear_bit_unlock(HANDLE_PIN_BIT, ptr);
 }
 
+/* This function must be called when hold class->lock.  */
+
 static void reset_page(struct page *page)
 {
 	clear_bit(PG_private, &page->flags);
@@ -885,8 +921,8 @@ static void free_zspage(struct page *first_page)
 	if (!head_extra)
 		return;
 
-	list_for_each_entry_safe(nextp, tmp, &head_extra->lru, lru) {
-		list_del(&nextp->lru);
+	list_for_each_entry_safe(nextp, tmp, &head_extra->zs_lru, zs_lru) {
+		list_del(&nextp->zs_lru);
 		reset_page(nextp);
 		__free_page(nextp);
 	}
@@ -937,6 +973,314 @@ static void init_zspage(struct page *first_page, struct size_class *class)
 	}
 }
 
+static void
+get_class(struct size_class *class)
+{
+	atomic_inc(&class->count);
+}
+
+static void
+put_class(struct size_class *class)
+{
+	if (atomic_dec_and_test(&class->count))
+		kfree(class);
+}
+
+static int zs_isolatepage(struct page *page)
+{
+	int ret = -EBUSY;
+	struct size_class *class;
+
+	read_lock(&zs_class_rwlock);
+	class = page->zs_class;
+	/* Make suse the class is get before the page is released by
+	 * free_zspage.
+	 * Then the class is must available because it must be released
+	 * after page is released.
+	 */
+	smp_mb();
+	if (!get_page_unless_zero(page))
+		goto out;
+	if (page_count(page) != 2) {
+		put_page(page);
+		goto out;
+	}
+
+	BUG_ON(class == NULL);
+	get_class(class);
+	spin_lock(&class->lock);
+	if (page->mapping != NULL)
+		ret = 0;
+	spin_unlock(&class->lock);
+
+out:
+	read_unlock(&zs_class_rwlock);
+	return ret;
+}
+
+static void zs_putpage(struct page *page)
+{
+	put_class(page->zs_class);
+	put_page(page);
+}
+
+struct zspage_loop_struct {
+	struct size_class *class;
+	struct page *page;
+	struct page *newpage;
+	void *newaddr;
+
+	struct page *cur_page;
+	void *cur_addr;
+
+	unsigned long offset;
+	unsigned int idx;
+};
+
+static void
+zspage_migratepage_obj_callback(unsigned long head,
+				struct zspage_loop_struct *zls)
+{
+	BUG_ON(zls == NULL);
+
+	if (head & OBJ_ALLOCATED_TAG) {
+		unsigned long copy_size;
+		unsigned long newobj;
+		unsigned long handle;
+
+		/* Migratepage allocated just need handle the zls->page.  */
+		if (zls->cur_page != zls->page)
+			return;
+
+		copy_size = zls->class->size;
+
+		if (zls->offset + copy_size > PAGE_SIZE)
+			copy_size = PAGE_SIZE - zls->offset;
+
+		newobj = (unsigned long)location_to_obj(zls->newpage, zls->idx);
+
+		/* Remove OBJ_ALLOCATED_TAG will get the real handle.  */
+		handle = head & ~OBJ_ALLOCATED_TAG;
+		record_obj(handle, newobj);
+
+		/* Copy allocated chunk to allocated chunk.
+		 * Handle is included in it.
+		 */
+		memcpy(zls->newaddr + zls->offset,
+		       zls->cur_addr + zls->offset, copy_size);
+	} else {
+		struct link_free *link;
+		unsigned long obj;
+		unsigned long tmp_idx;
+		struct page *tmp_page;
+
+		link = (struct link_free *)(zls->cur_addr + zls->offset);
+		obj = (unsigned long)link->next;
+
+		obj_to_location(obj, &tmp_page, &tmp_idx);
+		if (tmp_page == zls->page) {
+			/* Update new obj with newpage to current link.  */
+			obj = (unsigned long)location_to_obj(zls->newpage,
+							     tmp_idx);
+			link->next = (void *)obj;
+		}
+
+		if (zls->cur_page == zls->page) {
+			/* Update obj to link of newaddr.  */
+			link = (struct link_free *)(zls->newaddr + zls->offset);
+			link->next = (void *)obj;
+		}
+	}
+}
+
+static void
+zspage_loop_1(struct size_class *class, struct page *cur_page,
+	      struct zspage_loop_struct *zls,
+	      void (*callback)(unsigned long head,
+			       struct zspage_loop_struct *zls))
+{
+	void *addr;
+	unsigned long m_offset = 0;
+	unsigned int obj_idx = 0;
+
+	if (!is_first_page(cur_page))
+		m_offset = cur_page->index;
+
+	addr = kmap_atomic(cur_page);
+
+	if (zls) {
+		zls->cur_page = cur_page;
+		zls->cur_addr = addr;
+	}
+
+	while (m_offset < PAGE_SIZE) {
+		unsigned long head = obj_to_head(class, cur_page,
+						 addr + m_offset);
+
+		if (zls) {
+			zls->offset = m_offset;
+			zls->idx = obj_idx;
+		}
+
+		callback(head, zls);
+
+		m_offset += class->size;
+		obj_idx++;
+	}
+
+	kunmap_atomic(addr);
+}
+
+/* If cur_page is newpage, it will be set to page.
+ * Set page and newpage to NULL to close this function.
+ */
+static void
+zspage_loop(struct size_class *class, struct page *first_page,
+	    struct page *page, struct page *newpage,
+	    void (*callback)(unsigned long head,
+			     struct zspage_loop_struct *zls))
+{
+	struct page *cur_page;
+	struct zspage_loop_struct zl;
+	struct zspage_loop_struct *zls = NULL;
+
+	BUG_ON(!is_first_page(first_page));
+
+	if (page) {
+		zls = &zl;
+		zls->class = class;
+		zls->page = page;
+		zls->newpage = newpage;
+		zls->newaddr = kmap_atomic(zls->newpage);
+	}
+
+	cur_page = first_page;
+	while (cur_page) {
+		if (cur_page == newpage)
+			cur_page = page;
+
+		zspage_loop_1(class, cur_page, zls, callback);
+
+		if (cur_page == page)
+			cur_page = newpage;
+
+		cur_page = get_next_page(cur_page);
+	}
+
+	if (zls)
+		kunmap_atomic(zls->newaddr);
+}
+
+int zs_migratepage(struct page *page, struct page *newpage, int force,
+		   enum migrate_mode mode)
+{
+	struct size_class *class = page->zs_class;
+	struct page *first_page;
+	struct page *tmp_page;
+	unsigned long tmp_idx;
+
+	write_lock(&zs_tag_rwlock);
+
+	BUG_ON(class == NULL);
+
+	spin_lock(&class->lock);
+	BUG_ON(page->mapping == NULL);
+
+	first_page = get_first_page(page);
+
+	newpage->zs_class = page->zs_class;
+	INIT_LIST_HEAD(&newpage->lru);
+	INIT_LIST_HEAD(&newpage->zs_lru);
+	page->isolate = zs_isolatepage;
+	page->put = zs_putpage;
+	page->migrate = zs_migratepage;
+
+	if (page == first_page) {	/* first page */
+		newpage->inuse = page->inuse;
+		newpage->freelist = page->freelist;
+		newpage->objects = page->objects;
+		newpage->mapping = page->mapping;
+		SetPagePrivate(newpage);
+
+		if (class->huge) {
+			unsigned long handle = page_private(page);
+			unsigned long obj
+				= (unsigned long)location_to_obj(newpage, 0);
+
+			if (handle != 0) {
+				void *addr, *newaddr;
+
+				/* The page is allocated.  */
+				handle = handle & ~OBJ_ALLOCATED_TAG;
+				record_obj(handle, obj);
+				addr = kmap_atomic(page);
+				newaddr = kmap_atomic(newpage);
+				memcpy(newaddr, addr, class->size);
+				kunmap_atomic(newaddr);
+				kunmap_atomic(addr);
+			} else
+				first_page->freelist = (void *)obj;
+			set_page_private(newpage, handle);
+		} else {
+			struct page *head_extra
+				= (struct page *)page_private(page);
+
+			if (head_extra) {
+				struct page *nextp;
+
+				head_extra->first_page = newpage;
+				list_for_each_entry(nextp, &head_extra->zs_lru,
+						    zs_lru)
+					nextp->first_page = newpage;
+			}
+			set_page_private(newpage, (unsigned long)head_extra);
+		}
+		replace_zspage_first_page(class, page, newpage);
+		first_page = newpage;
+	} else {
+		void *addr, *newaddr;
+
+		newpage->first_page = page->first_page;
+		newpage->index = page->index;
+
+		if ((struct page *)page_private(first_page) == page)
+			set_page_private(first_page, (unsigned long)newpage);
+		list_replace(&page->zs_lru, &newpage->zs_lru);
+
+		if (page->index > 0) {
+			addr = kmap_atomic(page);
+			newaddr = kmap_atomic(newpage);
+			memcpy(newaddr, addr, page->index);
+			kunmap_atomic(newaddr);
+			kunmap_atomic(addr);
+		}
+	}
+	if (is_last_page(page))	/* last page */
+		SetPagePrivate2(newpage);
+
+	if (!class->huge) {
+		zspage_loop(class, first_page, page, newpage,
+			    zspage_migratepage_obj_callback);
+
+		/* Update first_page->freelist if need.  */
+		obj_to_location((unsigned long)first_page->freelist,
+				&tmp_page, &tmp_idx);
+		if (tmp_page == page)
+			first_page->freelist = location_to_obj(newpage,
+							       tmp_idx);
+	}
+
+	get_page(newpage);
+	SetPageMovable(newpage);
+
+	reset_page(page);
+	put_page(page);
+
+	spin_unlock(&class->lock);
+	write_unlock(&zs_tag_rwlock);
+	return MIGRATEPAGE_SUCCESS;
+}
+
 /*
  * Allocate a zspage for the given size class
  */
@@ -948,11 +1292,11 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
 	/*
 	 * Allocate individual pages and link them together as:
 	 * 1. first page->private = first sub-page
-	 * 2. all sub-pages are linked together using page->lru
+	 * 2. all sub-pages are linked together using page->zs_lru
 	 * 3. each sub-page is linked to the first page using page->first_page
 	 *
 	 * For each size class, First/Head pages are linked together using
-	 * page->lru. Also, we set PG_private to identify the first page
+	 * page->zs_lru. Also, we set PG_private to identify the first page
 	 * (i.e. no other sub-page has this flag set) and PG_private_2 to
 	 * identify the last page.
 	 */
@@ -965,6 +1309,11 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
 			goto cleanup;
 
 		INIT_LIST_HEAD(&page->lru);
+		INIT_LIST_HEAD(&page->zs_lru);
+		page->isolate = zs_isolatepage;
+		page->put = zs_putpage;
+		page->migrate = zs_migratepage;
+		page->zs_class = class;
 		if (i == 0) {	/* first page */
 			SetPagePrivate(page);
 			set_page_private(page, 0);
@@ -973,10 +1322,12 @@ static struct page *alloc_zspage(struct size_class *class, gfp_t flags)
 		}
 		if (i == 1)
 			set_page_private(first_page, (unsigned long)page);
-		if (i >= 1)
+		if (i >= 1) {
 			page->first_page = first_page;
+			page->mapping = (void *)1;
+		}
 		if (i >= 2)
-			list_add(&page->lru, &prev_page->lru);
+			list_add(&page->zs_lru, &prev_page->zs_lru);
 		if (i == class->pages_per_zspage - 1)	/* last page */
 			SetPagePrivate2(page);
 		prev_page = page;
@@ -1267,6 +1618,7 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 	BUG_ON(in_interrupt());
 
 	/* From now on, migration cannot move the object */
+	read_lock(&zs_tag_rwlock);
 	pin_tag(handle);
 
 	obj = handle_to_obj(handle);
@@ -1330,6 +1682,7 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 	}
 	put_cpu_var(zs_map_area);
 	unpin_tag(handle);
+	read_unlock(&zs_tag_rwlock);
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
@@ -1365,6 +1718,21 @@ static unsigned long obj_malloc(struct page *first_page,
 }
 
 
+static void set_zspage_movable(struct size_class *class, struct page *page)
+{
+	struct page *head_extra = (struct page *)page_private(page);
+	struct page *nextp;
+
+	BUG_ON(!is_first_page(page));
+
+	SetPageMovable(page);
+	if (!class->huge && head_extra) {
+		SetPageMovable(head_extra);
+		list_for_each_entry(nextp, &head_extra->zs_lru, zs_lru)
+			SetPageMovable(nextp);
+	}
+}
+
 /**
  * zs_malloc - Allocate block of given size from pool.
  * @pool: pool to allocate from
@@ -1407,6 +1775,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
 					&pool->pages_allocated);
 
 		spin_lock(&class->lock);
+		set_zspage_movable(class, first_page);
 		zs_stat_inc(class, OBJ_ALLOCATED, get_maxobj_per_zspage(
 				class->size, class->pages_per_zspage));
 	}
@@ -1464,6 +1833,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 	if (unlikely(!handle))
 		return;
 
+	read_lock(&zs_tag_rwlock);
 	pin_tag(handle);
 	obj = handle_to_obj(handle);
 	obj_to_location(obj, &f_page, &f_objidx);
@@ -1484,6 +1854,7 @@ void zs_free(struct zs_pool *pool, unsigned long handle)
 	}
 	spin_unlock(&class->lock);
 	unpin_tag(handle);
+	read_unlock(&zs_tag_rwlock);
 
 	free_handle(pool, handle);
 }
@@ -1931,6 +2302,8 @@ struct zs_pool *zs_create_pool(char *name, gfp_t flags)
 			get_maxobj_per_zspage(size, pages_per_zspage) == 1)
 			class->huge = true;
 		spin_lock_init(&class->lock);
+		atomic_set(&class->count, 0);
+		get_class(class);
 		pool->size_class[i] = class;
 
 		prev_class = class;
@@ -1978,7 +2351,9 @@ void zs_destroy_pool(struct zs_pool *pool)
 					class->size, fg);
 			}
 		}
-		kfree(class);
+		write_lock(&zs_class_rwlock);
+		put_class(class);
+		write_unlock(&zs_class_rwlock);
 	}
 
 	destroy_handle_cache(pool);
@@ -1995,6 +2370,9 @@ static int __init zs_init(void)
 	if (ret)
 		goto notifier_fail;
 
+	rwlock_init(&zs_class_rwlock);
+	rwlock_init(&zs_tag_rwlock);
+
 	init_zs_size_classes();
 
 #ifdef CONFIG_ZPOOL
-- 
1.9.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [ECOS]     [Asterisk Internet PBX]     [Linux API]