The patch titled Subject: zsmalloc: migrate tail pages in zspage has been added to the -mm tree. Its filename is zsmalloc-migrate-tail-pages-in-zspage.patch This patch should soon appear at http://ozlabs.org/~akpm/mmots/broken-out/zsmalloc-migrate-tail-pages-in-zspage.patch and later at http://ozlabs.org/~akpm/mmotm/broken-out/zsmalloc-migrate-tail-pages-in-zspage.patch Before you just go and hit "reply", please: a) Consider who else should be cc'ed b) Prefer to cc a suitable mailing list as well c) Ideally: find the original patch on the mailing list and do a reply-to-all to that, adding suitable additional cc's *** Remember to use Documentation/SubmitChecklist when testing your code *** The -mm tree is included into linux-next and is updated there every 3-4 working days ------------------------------------------------------ From: Minchan Kim <minchan@xxxxxxxxxx> Subject: zsmalloc: migrate tail pages in zspage This patch enables tail page migration of zspage. In this point, I tested zsmalloc regression with micro-benchmark which does zs_malloc/map/unmap/zs_free for all size class in every CPU(my system is 12) during 20 sec. It shows 1% regression which is really small when we consider the benefit of this feature and realworkload overhead(i.e., most overhead comes from compression). Signed-off-by: Minchan Kim <minchan@xxxxxxxxxx>Cc: Vlastimil Babka <vbabka@xxxxxxx> Cc: Joonsoo Kim <iamjoonsoo.kim@xxxxxxx> Cc: Konstantin Khlebnikov <koct9i@xxxxxxxxx> Cc: Rafael Aquini <aquini@xxxxxxxxxx> Cc: Russ Knize <rknize@xxxxxxxxxxxx> Cc: Mel Gorman <mgorman@xxxxxxx> Cc: Hugh Dickins <hughd@xxxxxxxxxx> Cc: Sergey Senozhatsky <sergey.senozhatsky@xxxxxxxxx> Cc: Rik van Riel <riel@xxxxxxxxxx> Cc: Gioh Kim <gi-oh.kim@xxxxxxxxxxxxxxxx> Cc: Sangseok Lee <sangseok.lee@xxxxxxx> Cc: Chan Gyun Jeong <chan.jeong@xxxxxxx> Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Cc: YiPing Xu <xuyiping@xxxxxxxxxxxxx> Cc: Minchan Kim <minchan@xxxxxxxxxx> Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx> --- mm/zsmalloc.c | 129 ++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 114 insertions(+), 15 deletions(-) diff -puN mm/zsmalloc.c~zsmalloc-migrate-tail-pages-in-zspage mm/zsmalloc.c --- a/mm/zsmalloc.c~zsmalloc-migrate-tail-pages-in-zspage +++ a/mm/zsmalloc.c @@ -551,6 +551,19 @@ static void set_zspage_mapping(struct pa m->class = class_idx; } +static bool check_isolated_page(struct page *first_page) +{ + struct page *cursor; + + for (cursor = first_page; cursor != NULL; cursor = + get_next_page(cursor)) { + if (PageIsolated(cursor)) + return true; + } + + return false; +} + /* * zsmalloc divides the pool into various size classes where each * class maintains a list of zspages where each zspage is divided @@ -1052,6 +1065,44 @@ void lock_zspage(struct page *first_page } while ((cursor = get_next_page(cursor)) != NULL); } +int trylock_zspage(struct page *first_page, struct page *locked_page) +{ + struct page *cursor, *fail; + + VM_BUG_ON_PAGE(!is_first_page(first_page), first_page); + + for (cursor = first_page; cursor != NULL; cursor = + get_next_page(cursor)) { + if (cursor != locked_page) { + if (!trylock_page(cursor)) { + fail = cursor; + goto unlock; + } + } + } + + return 1; +unlock: + for (cursor = first_page; cursor != fail; cursor = + get_next_page(cursor)) { + if (cursor != locked_page) + unlock_page(cursor); + } + + return 0; +} + +void unlock_zspage(struct page *first_page, struct page *locked_page) +{ + struct page *cursor = first_page; + + for (; cursor != NULL; cursor = get_next_page(cursor)) { + VM_BUG_ON_PAGE(!PageLocked(cursor), cursor); + if (cursor != locked_page) + unlock_page(cursor); + } +} + static void free_zspage(struct zs_pool *pool, struct page *first_page) { struct page *nextp, *tmp; @@ -1090,15 +1141,16 @@ static void init_zspage(struct size_clas first_page->freelist = NULL; INIT_LIST_HEAD(&first_page->lru); set_zspage_inuse(first_page, 0); - BUG_ON(!trylock_page(first_page)); - __SetPageMovable(first_page, mapping); - unlock_page(first_page); while (page) { struct page *next_page; struct link_free *link; void *vaddr; + BUG_ON(!trylock_page(page)); + __SetPageMovable(page, mapping); + unlock_page(page); + vaddr = kmap_atomic(page); link = (struct link_free *)vaddr + off / sizeof(*link); @@ -1848,6 +1900,7 @@ static enum fullness_group putback_zspag VM_BUG_ON_PAGE(!list_empty(&first_page->lru), first_page); VM_BUG_ON_PAGE(ZsPageIsolate(first_page), first_page); + VM_BUG_ON_PAGE(check_isolated_page(first_page), first_page); fullness = get_fullness_group(class, first_page); insert_zspage(class, fullness, first_page); @@ -1954,6 +2007,12 @@ static struct page *isolate_source_page( if (!page) continue; + /* To prevent race between object and page migration */ + if (!trylock_zspage(page, NULL)) { + page = NULL; + continue; + } + remove_zspage(class, i, page); inuse = get_zspage_inuse(page); @@ -1962,6 +2021,7 @@ static struct page *isolate_source_page( if (inuse != freezed) { unfreeze_zspage(class, page, freezed); putback_zspage(class, page); + unlock_zspage(page, NULL); page = NULL; continue; } @@ -1993,6 +2053,12 @@ static struct page *isolate_target_page( if (!page) continue; + /* To prevent race between object and page migration */ + if (!trylock_zspage(page, NULL)) { + page = NULL; + continue; + } + remove_zspage(class, i, page); inuse = get_zspage_inuse(page); @@ -2001,6 +2067,7 @@ static struct page *isolate_target_page( if (inuse != freezed) { unfreeze_zspage(class, page, freezed); putback_zspage(class, page); + unlock_zspage(page, NULL); page = NULL; continue; } @@ -2074,11 +2141,13 @@ static void __zs_compact(struct zs_pool putback_zspage(class, dst_page); unfreeze_zspage(class, dst_page, class->objs_per_zspage); + unlock_zspage(dst_page, NULL); spin_unlock(&class->lock); dst_page = NULL; } if (zspage_empty(class, src_page)) { + unlock_zspage(src_page, NULL); free_zspage(pool, src_page); spin_lock(&class->lock); zs_stat_dec(class, OBJ_ALLOCATED, @@ -2101,12 +2170,14 @@ static void __zs_compact(struct zs_pool putback_zspage(class, src_page); unfreeze_zspage(class, src_page, class->objs_per_zspage); + unlock_zspage(src_page, NULL); } if (dst_page) { putback_zspage(class, dst_page); unfreeze_zspage(class, dst_page, class->objs_per_zspage); + unlock_zspage(dst_page, NULL); } spin_unlock(&class->lock); @@ -2209,10 +2280,11 @@ bool zs_page_isolate(struct page *page, VM_BUG_ON_PAGE(!PageLocked(page), page); VM_BUG_ON_PAGE(PageIsolated(page), page); /* - * In this implementation, it allows only first page migration. + * first_page will not be destroyed by PG_lock of @page but it could + * be migrated out. For prohibiting it, zs_page_migrate calls + * trylock_zspage so it closes the race. */ - VM_BUG_ON_PAGE(!is_first_page(page), page); - first_page = page; + first_page = get_first_page(page); /* * Without class lock, fullness is meaningless while constant @@ -2226,9 +2298,18 @@ bool zs_page_isolate(struct page *page, if (!spin_trylock(&class->lock)) return false; + if (check_isolated_page(first_page)) + goto skip_isolate; + + /* + * If this is first time isolation for zspage, isolate zspage from + * size_class to prevent further allocations from the zspage. + */ get_zspage_mapping(first_page, &class_idx, &fullness); remove_zspage(class, fullness, first_page); SetZsPageIsolate(first_page); + +skip_isolate: SetPageIsolated(page); spin_unlock(&class->lock); @@ -2251,7 +2332,7 @@ int zs_page_migrate(struct address_space VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - first_page = page; + first_page = get_first_page(page); get_zspage_mapping(first_page, &class_idx, &fullness); pool = page->mapping->private_data; class = pool->size_class[class_idx]; @@ -2266,6 +2347,13 @@ int zs_page_migrate(struct address_space if (get_zspage_inuse(first_page) == 0) goto out_class_unlock; + /* + * It prevents first_page migration during tail page opeartion for + * get_first_page's stability. + */ + if (!trylock_zspage(first_page, page)) + goto out_class_unlock; + freezed = freeze_zspage(class, first_page); if (freezed != get_zspage_inuse(first_page)) goto out_unfreeze; @@ -2304,21 +2392,26 @@ int zs_page_migrate(struct address_space kunmap_atomic(addr); replace_sub_page(class, first_page, newpage, page); - first_page = newpage; + first_page = get_first_page(newpage); get_page(newpage); VM_BUG_ON_PAGE(get_fullness_group(class, first_page) == ZS_EMPTY, first_page); - ClearZsPageIsolate(first_page); - putback_zspage(class, first_page); + if (!check_isolated_page(first_page)) { + INIT_LIST_HEAD(&first_page->lru); + ClearZsPageIsolate(first_page); + putback_zspage(class, first_page); + } + /* Migration complete. Free old page */ ClearPageIsolated(page); reset_page(page); put_page(page); ret = MIGRATEPAGE_SUCCESS; - + page = newpage; out_unfreeze: unfreeze_zspage(class, first_page, freezed); + unlock_zspage(first_page, page); out_class_unlock: spin_unlock(&class->lock); @@ -2336,7 +2429,7 @@ void zs_page_putback(struct page *page) VM_BUG_ON_PAGE(!PageMovable(page), page); VM_BUG_ON_PAGE(!PageIsolated(page), page); - first_page = page; + first_page = get_first_page(page); get_zspage_mapping(first_page, &class_idx, &fullness); pool = page->mapping->private_data; class = pool->size_class[class_idx]; @@ -2346,11 +2439,17 @@ void zs_page_putback(struct page *page) * in zs_free will wait the page lock of @page without * destroying of zspage. */ - INIT_LIST_HEAD(&first_page->lru); spin_lock(&class->lock); ClearPageIsolated(page); - ClearZsPageIsolate(first_page); - putback_zspage(class, first_page); + /* + * putback zspage to right list if this is last isolated page + * putback in the zspage. + */ + if (!check_isolated_page(first_page)) { + INIT_LIST_HEAD(&first_page->lru); + ClearZsPageIsolate(first_page); + putback_zspage(class, first_page); + } spin_unlock(&class->lock); } _ Patches currently in -mm which might be from minchan@xxxxxxxxxx are zsmalloc-use-first_page-rather-than-page.patch zsmalloc-clean-up-many-bug_on.patch zsmalloc-reordering-function-parameter.patch zsmalloc-remove-unused-pool-param-in-obj_free.patch mm-use-put_page-to-free-page-instead-of-putback_lru_page.patch mm-compaction-support-non-lru-movable-page-migration.patch mm-add-non-lru-movable-page-support-document.patch mm-balloon-use-general-movable-page-feature-into-balloon.patch zsmalloc-keep-max_object-in-size_class.patch zsmalloc-squeeze-inuse-into-page-mapping.patch zsmalloc-remove-page_mapcount_reset.patch zsmalloc-squeeze-freelist-into-page-mapping.patch zsmalloc-move-struct-zs_meta-from-mapping-to-freelist.patch zsmalloc-factor-page-chain-functionality-out.patch zsmalloc-separate-free_zspage-from-putback_zspage.patch zsmalloc-zs_compact-refactoring.patch zsmalloc-migrate-head-page-of-zspage.patch zsmalloc-use-single-linked-list-for-page-chain.patch zsmalloc-migrate-tail-pages-in-zspage.patch zram-use-__gfp_movable-for-memory-allocation.patch -- To unsubscribe from this list: send the line "unsubscribe mm-commits" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html