From: Zi Yan <ziy@xxxxxxxxxx> It unmaps two lists of pages, then exchange them in exchange_page_lists_mthread(), and finally remaps both lists of pages. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- include/linux/exchange.h | 2 + mm/exchange.c | 397 +++++++++++++++++++++++++++++++++++++++++++++++ mm/exchange_page.c | 1 - 3 files changed, 399 insertions(+), 1 deletion(-) diff --git a/include/linux/exchange.h b/include/linux/exchange.h index 778068e..20d2184 100644 --- a/include/linux/exchange.h +++ b/include/linux/exchange.h @@ -20,4 +20,6 @@ struct exchange_page_info { int exchange_pages(struct list_head *exchange_list, enum migrate_mode mode, int reason); +int exchange_pages_concur(struct list_head *exchange_list, + enum migrate_mode mode, int reason); #endif /* _LINUX_EXCHANGE_H */ diff --git a/mm/exchange.c b/mm/exchange.c index ce2c899..bbada58 100644 --- a/mm/exchange.c +++ b/mm/exchange.c @@ -600,3 +600,400 @@ int exchange_pages(struct list_head *exchange_list, } return failed; } + + +static int unmap_pair_pages_concur(struct exchange_page_info *one_pair, + int force, enum migrate_mode mode) +{ + int rc = -EAGAIN; + struct anon_vma *anon_vma_from_page = NULL, *anon_vma_to_page = NULL; + struct page *from_page = one_pair->from_page; + struct page *to_page = one_pair->to_page; + + /* from_page lock down */ + if (!trylock_page(from_page)) { + if (!force || ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC)) + goto out; + + lock_page(from_page); + } + + BUG_ON(PageWriteback(from_page)); + + /* + * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, + * we cannot notice that anon_vma is freed while we migrates a page. + * This get_anon_vma() delays freeing anon_vma pointer until the end + * of migration. File cache pages are no problem because of page_lock() + * File Caches may use write_page() or lock_page() in migration, then, + * just care Anon page here. + * + * Only page_get_anon_vma() understands the subtleties of + * getting a hold on an anon_vma from outside one of its mms. + * But if we cannot get anon_vma, then we won't need it anyway, + * because that implies that the anon page is no longer mapped + * (and cannot be remapped so long as we hold the page lock). + */ + if (PageAnon(from_page) && !PageKsm(from_page)) + one_pair->from_anon_vma = anon_vma_from_page + = page_get_anon_vma(from_page); + + /* to_page lock down */ + if (!trylock_page(to_page)) { + if (!force || ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC)) + goto out_unlock; + + lock_page(to_page); + } + + BUG_ON(PageWriteback(to_page)); + + /* + * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, + * we cannot notice that anon_vma is freed while we migrates a page. + * This get_anon_vma() delays freeing anon_vma pointer until the end + * of migration. File cache pages are no problem because of page_lock() + * File Caches may use write_page() or lock_page() in migration, then, + * just care Anon page here. + * + * Only page_get_anon_vma() understands the subtleties of + * getting a hold on an anon_vma from outside one of its mms. + * But if we cannot get anon_vma, then we won't need it anyway, + * because that implies that the anon page is no longer mapped + * (and cannot be remapped so long as we hold the page lock). + */ + if (PageAnon(to_page) && !PageKsm(to_page)) + one_pair->to_anon_vma = anon_vma_to_page = page_get_anon_vma(to_page); + + /* + * Corner case handling: + * 1. When a new swap-cache page is read into, it is added to the LRU + * and treated as swapcache but it has no rmap yet. + * Calling try_to_unmap() against a page->mapping==NULL page will + * trigger a BUG. So handle it here. + * 2. An orphaned page (see truncate_complete_page) might have + * fs-private metadata. The page can be picked up due to memory + * offlining. Everywhere else except page reclaim, the page is + * invisible to the vm, so the page can not be migrated. So try to + * free the metadata, so the page can be freed. + */ + if (!from_page->mapping) { + VM_BUG_ON_PAGE(PageAnon(from_page), from_page); + if (page_has_private(from_page)) { + try_to_free_buffers(from_page); + goto out_unlock_both; + } + } else if (page_mapped(from_page)) { + /* Establish migration ptes */ + VM_BUG_ON_PAGE(PageAnon(from_page) && !PageKsm(from_page) && + !anon_vma_from_page, from_page); + try_to_unmap(from_page, + TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + + one_pair->from_page_was_mapped = 1; + } + + if (!to_page->mapping) { + VM_BUG_ON_PAGE(PageAnon(to_page), to_page); + if (page_has_private(to_page)) { + try_to_free_buffers(to_page); + goto out_unlock_both; + } + } else if (page_mapped(to_page)) { + /* Establish migration ptes */ + VM_BUG_ON_PAGE(PageAnon(to_page) && !PageKsm(to_page) && + !anon_vma_to_page, to_page); + try_to_unmap(to_page, + TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + + one_pair->to_page_was_mapped = 1; + } + + return MIGRATEPAGE_SUCCESS; + +out_unlock_both: + if (anon_vma_to_page) + put_anon_vma(anon_vma_to_page); + unlock_page(to_page); +out_unlock: + /* Drop an anon_vma reference if we took one */ + if (anon_vma_from_page) + put_anon_vma(anon_vma_from_page); + unlock_page(from_page); +out: + + return rc; +} + +static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr, + struct list_head *exchange_list_ptr, + enum migrate_mode mode) +{ + int rc = -EBUSY; + int nr_failed = 0; + struct address_space *to_page_mapping, *from_page_mapping; + struct exchange_page_info *one_pair, *one_pair2; + + list_for_each_entry_safe(one_pair, one_pair2, unmapped_list_ptr, list) { + struct page *from_page = one_pair->from_page; + struct page *to_page = one_pair->to_page; + + VM_BUG_ON_PAGE(!PageLocked(from_page), from_page); + VM_BUG_ON_PAGE(!PageLocked(to_page), to_page); + + /* copy page->mapping not use page_mapping() */ + to_page_mapping = page_mapping(to_page); + from_page_mapping = page_mapping(from_page); + + BUG_ON(from_page_mapping); + BUG_ON(to_page_mapping); + + BUG_ON(PageWriteback(from_page)); + BUG_ON(PageWriteback(to_page)); + + /* actual page mapping exchange */ + rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping, + to_page, from_page, mode, 0, 0); + + if (rc) { + if (one_pair->from_page_was_mapped) + remove_migration_ptes(from_page, from_page, false); + if (one_pair->to_page_was_mapped) + remove_migration_ptes(to_page, to_page, false); + + if (one_pair->from_anon_vma) + put_anon_vma(one_pair->from_anon_vma); + unlock_page(from_page); + + if (one_pair->to_anon_vma) + put_anon_vma(one_pair->to_anon_vma); + unlock_page(to_page); + + mod_node_page_state(page_pgdat(from_page), NR_ISOLATED_ANON + + page_is_file_cache(from_page), -hpage_nr_pages(from_page)); + putback_lru_page(from_page); + + mod_node_page_state(page_pgdat(to_page), NR_ISOLATED_ANON + + page_is_file_cache(to_page), -hpage_nr_pages(to_page)); + putback_lru_page(to_page); + + one_pair->from_page = NULL; + one_pair->to_page = NULL; + + list_move(&one_pair->list, exchange_list_ptr); + ++nr_failed; + } + } + + return nr_failed; +} + +static int exchange_page_data_concur(struct list_head *unmapped_list_ptr, + enum migrate_mode mode) +{ + struct exchange_page_info *one_pair; + int num_pages = 0, idx = 0; + struct page **src_page_list = NULL, **dst_page_list = NULL; + unsigned long size = 0; + int rc = -EFAULT; + + if (list_empty(unmapped_list_ptr)) + return 0; + + /* form page list */ + list_for_each_entry(one_pair, unmapped_list_ptr, list) { + ++num_pages; + size += PAGE_SIZE * hpage_nr_pages(one_pair->from_page); + } + + src_page_list = kzalloc(sizeof(struct page *)*num_pages, GFP_KERNEL); + if (!src_page_list) + return -ENOMEM; + dst_page_list = kzalloc(sizeof(struct page *)*num_pages, GFP_KERNEL); + if (!dst_page_list) + return -ENOMEM; + + list_for_each_entry(one_pair, unmapped_list_ptr, list) { + src_page_list[idx] = one_pair->from_page; + dst_page_list[idx] = one_pair->to_page; + ++idx; + } + + BUG_ON(idx != num_pages); + + + if (mode & MIGRATE_MT) + rc = exchange_page_lists_mthread(dst_page_list, src_page_list, + num_pages); + + if (rc) { + list_for_each_entry(one_pair, unmapped_list_ptr, list) { + if (PageHuge(one_pair->from_page) || + PageTransHuge(one_pair->from_page)) { + exchange_huge_page(one_pair->to_page, one_pair->from_page); + } else { + exchange_highpage(one_pair->to_page, one_pair->from_page); + } + } + } + + kfree(src_page_list); + kfree(dst_page_list); + + list_for_each_entry(one_pair, unmapped_list_ptr, list) { + exchange_page_flags(one_pair->to_page, one_pair->from_page); + } + + return rc; +} + +static int remove_migration_ptes_concur(struct list_head *unmapped_list_ptr) +{ + struct exchange_page_info *iterator; + + list_for_each_entry(iterator, unmapped_list_ptr, list) { + remove_migration_ptes(iterator->from_page, iterator->to_page, false); + remove_migration_ptes(iterator->to_page, iterator->from_page, false); + + + if (iterator->from_anon_vma) + put_anon_vma(iterator->from_anon_vma); + unlock_page(iterator->from_page); + + + if (iterator->to_anon_vma) + put_anon_vma(iterator->to_anon_vma); + unlock_page(iterator->to_page); + + + putback_lru_page(iterator->from_page); + iterator->from_page = NULL; + + putback_lru_page(iterator->to_page); + iterator->to_page = NULL; + } + + return 0; +} + +int exchange_pages_concur(struct list_head *exchange_list, + enum migrate_mode mode, int reason) +{ + struct exchange_page_info *one_pair, *one_pair2; + int pass = 0; + int retry = 1; + int nr_failed = 0; + int nr_succeeded = 0; + int rc = 0; + LIST_HEAD(serialized_list); + LIST_HEAD(unmapped_list); + + for(pass = 0; pass < 1 && retry; pass++) { + retry = 0; + + /* unmap and get new page for page_mapping(page) == NULL */ + list_for_each_entry_safe(one_pair, one_pair2, exchange_list, list) { + struct page *from_page = one_pair->from_page; + struct page *to_page = one_pair->to_page; + cond_resched(); + + if (page_count(from_page) == 1) { + /* page was freed from under us. So we are done */ + ClearPageActive(from_page); + ClearPageUnevictable(from_page); + + put_page(from_page); + dec_node_page_state(from_page, NR_ISOLATED_ANON + + page_is_file_cache(from_page)); + + if (page_count(to_page) == 1) { + ClearPageActive(to_page); + ClearPageUnevictable(to_page); + put_page(to_page); + } else { + mod_node_page_state(page_pgdat(to_page), NR_ISOLATED_ANON + + page_is_file_cache(to_page), -hpage_nr_pages(to_page)); + putback_lru_page(to_page); + } + list_del(&one_pair->list); + + continue; + } + + if (page_count(to_page) == 1) { + /* page was freed from under us. So we are done */ + ClearPageActive(to_page); + ClearPageUnevictable(to_page); + + put_page(to_page); + + dec_node_page_state(to_page, NR_ISOLATED_ANON + + page_is_file_cache(to_page)); + + mod_node_page_state(page_pgdat(from_page), NR_ISOLATED_ANON + + page_is_file_cache(from_page), -hpage_nr_pages(from_page)); + putback_lru_page(from_page); + + list_del(&one_pair->list); + continue; + } + /* We do not exchange huge pages and file-backed pages concurrently */ + if (PageHuge(one_pair->from_page) || PageHuge(one_pair->to_page)) { + rc = -ENODEV; + } + else if ((page_mapping(one_pair->from_page) != NULL) || + (page_mapping(one_pair->from_page) != NULL)) { + rc = -ENODEV; + } + else + rc = unmap_pair_pages_concur(one_pair, 1, mode); + + switch(rc) { + case -ENODEV: + list_move(&one_pair->list, &serialized_list); + break; + case -ENOMEM: + goto out; + case -EAGAIN: + retry++; + break; + case MIGRATEPAGE_SUCCESS: + list_move(&one_pair->list, &unmapped_list); + nr_succeeded++; + break; + default: + /* + * Permanent failure (-EBUSY, -ENOSYS, etc.): + * unlike -EAGAIN case, the failed page is + * removed from migration page list and not + * retried in the next outer loop. + */ + list_move(&one_pair->list, &serialized_list); + nr_failed++; + break; + } + } + + /* move page->mapping to new page, only -EAGAIN could happen */ + exchange_page_mapping_concur(&unmapped_list, exchange_list, mode); + + + /* copy pages in unmapped_list */ + exchange_page_data_concur(&unmapped_list, mode); + + + /* remove migration pte, if old_page is NULL?, unlock old and new + * pages, put anon_vma, put old and new pages */ + remove_migration_ptes_concur(&unmapped_list); + } + + nr_failed += retry; + rc = nr_failed; + + exchange_pages(&serialized_list, mode, reason); +out: + list_splice(&unmapped_list, exchange_list); + list_splice(&serialized_list, exchange_list); + + return nr_failed?-EFAULT:0; +} diff --git a/mm/exchange_page.c b/mm/exchange_page.c index 6054697..5dba0a6 100644 --- a/mm/exchange_page.c +++ b/mm/exchange_page.c @@ -126,7 +126,6 @@ int exchange_page_lists_mthread(struct page **to, struct page **from, int nr_pag int to_node = page_to_nid(*to); int i; struct copy_page_info *work_items; - int nr_pages_per_page = hpage_nr_pages(*from); const struct cpumask *per_node_cpumask = cpumask_of_node(to_node); int cpu_id_list[32] = {0}; int cpu; -- 2.7.4