From: Zi Yan <ziy@xxxxxxxxxx> This is only done for the basic exchange pages, because we might need to lock multiple files when doing concurrent exchange pages, which could cause deadlocks easily. Signed-off-by: Zi Yan <ziy@xxxxxxxxxx> --- mm/exchange.c | 284 ++++++++++++++++++++++++++++++++++++++++++++++------------ mm/internal.h | 9 ++ mm/migrate.c | 6 +- 3 files changed, 241 insertions(+), 58 deletions(-) diff --git a/mm/exchange.c b/mm/exchange.c index bbada58..555a72c 100644 --- a/mm/exchange.c +++ b/mm/exchange.c @@ -20,6 +20,8 @@ #include <linux/memcontrol.h> #include <linux/balloon_compaction.h> #include <linux/buffer_head.h> +#include <linux/fs.h> /* buffer_migrate_page */ +#include <linux/backing-dev.h> #include "internal.h" @@ -147,8 +149,6 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page) from_page_flags.page_is_idle = page_is_idle(from_page); clear_page_idle(from_page); from_page_flags.page_swapcache = PageSwapCache(from_page); - from_page_flags.page_private = PagePrivate(from_page); - ClearPagePrivate(from_page); from_page_flags.page_writeback = test_clear_page_writeback(from_page); @@ -170,8 +170,6 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page) to_page_flags.page_is_idle = page_is_idle(to_page); clear_page_idle(to_page); to_page_flags.page_swapcache = PageSwapCache(to_page); - to_page_flags.page_private = PagePrivate(to_page); - ClearPagePrivate(to_page); to_page_flags.page_writeback = test_clear_page_writeback(to_page); /* set to_page */ @@ -268,18 +266,22 @@ static void exchange_page_flags(struct page *to_page, struct page *from_page) static int exchange_page_move_mapping(struct address_space *to_mapping, struct address_space *from_mapping, struct page *to_page, struct page *from_page, + struct buffer_head *to_head, struct buffer_head *from_head, enum migrate_mode mode, int to_extra_count, int from_extra_count) { - int to_expected_count = 1 + to_extra_count, - from_expected_count = 1 + from_extra_count; - unsigned long from_page_index = page_index(from_page), - to_page_index = page_index(to_page); + int to_expected_count = expected_page_refs(to_mapping, to_page) + to_extra_count, + from_expected_count = expected_page_refs(from_mapping, from_page) + from_extra_count; + unsigned long from_page_index = from_page->index; + unsigned long to_page_index = to_page->index; int to_swapbacked = PageSwapBacked(to_page), from_swapbacked = PageSwapBacked(from_page); - struct address_space *to_mapping_value = to_page->mapping, - *from_mapping_value = from_page->mapping; + struct address_space *to_mapping_value = to_page->mapping; + struct address_space *from_mapping_value = from_page->mapping; + VM_BUG_ON_PAGE(to_mapping != page_mapping(to_page), to_page); + VM_BUG_ON_PAGE(from_mapping != page_mapping(from_page), from_page); + VM_BUG_ON(PageCompound(from_page) != PageCompound(to_page)); if (!to_mapping) { /* Anonymous page without mapping */ @@ -293,26 +295,125 @@ static int exchange_page_move_mapping(struct address_space *to_mapping, return -EAGAIN; } - /* - * Now we know that no one else is looking at the page: - * no turning back from here. - */ - /* from_page */ - from_page->index = to_page_index; - from_page->mapping = to_mapping_value; + /* both are anonymous pages */ + if (!from_mapping && !to_mapping) { + /* from_page */ + from_page->index = to_page_index; + from_page->mapping = to_mapping_value; + + ClearPageSwapBacked(from_page); + if (to_swapbacked) + SetPageSwapBacked(from_page); + + + /* to_page */ + to_page->index = from_page_index; + to_page->mapping = from_mapping_value; + + ClearPageSwapBacked(to_page); + if (from_swapbacked) + SetPageSwapBacked(to_page); + } else if (!from_mapping && to_mapping) { + /* from is anonymous, to is file-backed */ + XA_STATE(to_xas, &to_mapping->i_pages, page_index(to_page)); + struct zone *from_zone, *to_zone; + int dirty; + + from_zone = page_zone(from_page); + to_zone = page_zone(to_page); + + xas_lock_irq(&to_xas); + + if (page_count(to_page) != to_expected_count || + xas_load(&to_xas) != to_page) { + xas_unlock_irq(&to_xas); + return -EAGAIN; + } + + if (!page_ref_freeze(to_page, to_expected_count)) { + xas_unlock_irq(&to_xas); + pr_debug("cannot freeze page count\n"); + return -EAGAIN; + } + + if (!page_ref_freeze(from_page, from_expected_count)) { + page_ref_unfreeze(to_page, to_expected_count); + xas_unlock_irq(&to_xas); + + return -EAGAIN; + } + /* + * Now we know that no one else is looking at the page: + * no turning back from here. + */ + ClearPageSwapBacked(from_page); + ClearPageSwapBacked(to_page); + + /* from_page */ + from_page->index = to_page_index; + from_page->mapping = to_mapping_value; + /* to_page */ + to_page->index = from_page_index; + to_page->mapping = from_mapping_value; + + if (to_swapbacked) + __SetPageSwapBacked(from_page); + else + VM_BUG_ON_PAGE(PageSwapCache(to_page), to_page); - ClearPageSwapBacked(from_page); - if (to_swapbacked) - SetPageSwapBacked(from_page); + if (from_swapbacked) + __SetPageSwapBacked(to_page); + else + VM_BUG_ON_PAGE(PageSwapCache(from_page), from_page); + dirty = PageDirty(to_page); - /* to_page */ - to_page->index = from_page_index; - to_page->mapping = from_mapping_value; + xas_store(&to_xas, from_page); + if (PageTransHuge(to_page)) { + int i; + for (i = 1; i < HPAGE_PMD_NR; i++) { + xas_next(&to_xas); + xas_store(&to_xas, from_page + i); + } + } + + /* move cache reference */ + page_ref_unfreeze(to_page, to_expected_count - hpage_nr_pages(to_page)); + page_ref_unfreeze(from_page, from_expected_count + hpage_nr_pages(from_page)); + + xas_unlock(&to_xas); + + /* + * If moved to a different zone then also account + * the page for that zone. Other VM counters will be + * taken care of when we establish references to the + * new page and drop references to the old page. + * + * Note that anonymous pages are accounted for + * via NR_FILE_PAGES and NR_ANON_MAPPED if they + * are mapped to swap space. + */ + if (to_zone != from_zone) { + __dec_node_state(to_zone->zone_pgdat, NR_FILE_PAGES); + __inc_node_state(from_zone->zone_pgdat, NR_FILE_PAGES); + if (PageSwapBacked(to_page) && !PageSwapCache(to_page)) { + __dec_node_state(to_zone->zone_pgdat, NR_SHMEM); + __inc_node_state(from_zone->zone_pgdat, NR_SHMEM); + } + if (dirty && mapping_cap_account_dirty(to_mapping)) { + __dec_node_state(to_zone->zone_pgdat, NR_FILE_DIRTY); + __dec_zone_state(to_zone, NR_ZONE_WRITE_PENDING); + __inc_node_state(from_zone->zone_pgdat, NR_FILE_DIRTY); + __inc_zone_state(from_zone, NR_ZONE_WRITE_PENDING); + } + } + local_irq_enable(); - ClearPageSwapBacked(to_page); - if (from_swapbacked) - SetPageSwapBacked(to_page); + } else { + /* from is file-backed to is anonymous: fold this to the case above */ + /* both are file-backed */ + VM_BUG_ON(1); + } return MIGRATEPAGE_SUCCESS; } @@ -322,6 +423,7 @@ static int exchange_from_to_pages(struct page *to_page, struct page *from_page, { int rc = -EBUSY; struct address_space *to_page_mapping, *from_page_mapping; + struct buffer_head *to_head = NULL, *to_bh = NULL; VM_BUG_ON_PAGE(!PageLocked(from_page), from_page); VM_BUG_ON_PAGE(!PageLocked(to_page), to_page); @@ -330,15 +432,71 @@ static int exchange_from_to_pages(struct page *to_page, struct page *from_page, to_page_mapping = page_mapping(to_page); from_page_mapping = page_mapping(from_page); + /* from_page has to be anonymous page */ BUG_ON(from_page_mapping); - BUG_ON(to_page_mapping); - BUG_ON(PageWriteback(from_page)); + /* writeback has to finish */ BUG_ON(PageWriteback(to_page)); - /* actual page mapping exchange */ - rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping, - to_page, from_page, mode, 0, 0); + /* to_page is anonymous */ + if (!to_page_mapping) { +exchange_mappings: + /* actual page mapping exchange */ + rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping, + to_page, from_page, NULL, NULL, mode, 0, 0); + } else { + if (to_page_mapping->a_ops->migratepage == buffer_migrate_page) { + if (!page_has_buffers(to_page)) + goto exchange_mappings; + + to_head = page_buffers(to_page); + + rc = exchange_page_move_mapping(to_page_mapping, + from_page_mapping, to_page, from_page, + to_head, NULL, mode, 0, 0); + + if (rc != MIGRATEPAGE_SUCCESS) + return rc; + + /* + * In the async case, migrate_page_move_mapping locked the buffers + * with an IRQ-safe spinlock held. In the sync case, the buffers + * need to be locked now + */ + if ((mode & MIGRATE_MODE_MASK) != MIGRATE_ASYNC) + BUG_ON(!buffer_migrate_lock_buffers(to_head, mode)); + + ClearPagePrivate(to_page); + set_page_private(from_page, page_private(to_page)); + set_page_private(to_page, 0); + /* transfer private page count */ + put_page(to_page); + get_page(from_page); + + to_bh = to_head; + do { + set_bh_page(to_bh, from_page, bh_offset(to_bh)); + to_bh = to_bh->b_this_page; + + } while (to_bh != to_head); + + SetPagePrivate(from_page); + + to_bh = to_head; + } else if (!to_page_mapping->a_ops->migratepage) { + /* fallback_migrate_page */ + if (PageDirty(to_page)) { + if ((mode & MIGRATE_MODE_MASK) != MIGRATE_SYNC) + return -EBUSY; + return writeout(to_page_mapping, to_page); + } + if (page_has_private(to_page) && + !try_to_release_page(to_page, GFP_KERNEL)) + return -EAGAIN; + + goto exchange_mappings; + } + } /* actual page data exchange */ if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -356,8 +514,28 @@ static int exchange_from_to_pages(struct page *to_page, struct page *from_page, rc = 0; } + /* + * 1. buffer_migrate_page: + * private flag should be transferred from to_page to from_page + * + * 2. anon<->anon, fallback_migrate_page: + * both have none private flags or to_page's is cleared. + * */ + VM_BUG_ON(!((page_has_private(from_page) && !page_has_private(to_page)) || + (!page_has_private(from_page) && !page_has_private(to_page)))); + exchange_page_flags(to_page, from_page); + if (to_bh) { + VM_BUG_ON(to_bh != to_head); + do { + unlock_buffer(to_bh); + put_bh(to_bh); + to_bh = to_bh->b_this_page; + + } while (to_bh != to_head); + } + return rc; } @@ -369,34 +547,12 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page, pgoff_t from_index, to_index; struct anon_vma *from_anon_vma = NULL, *to_anon_vma = NULL; - /* from_page lock down */ if (!trylock_page(from_page)) { if ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC) goto out; - lock_page(from_page); } - BUG_ON(PageWriteback(from_page)); - - /* - * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, - * we cannot notice that anon_vma is freed while we migrates a page. - * This get_anon_vma() delays freeing anon_vma pointer until the end - * of migration. File cache pages are no problem because of page_lock() - * File Caches may use write_page() or lock_page() in migration, then, - * just care Anon page here. - * - * Only page_get_anon_vma() understands the subtleties of - * getting a hold on an anon_vma from outside one of its mms. - * But if we cannot get anon_vma, then we won't need it anyway, - * because that implies that the anon page is no longer mapped - * (and cannot be remapped so long as we hold the page lock). - */ - if (PageAnon(from_page) && !PageKsm(from_page)) - from_anon_vma = page_get_anon_vma(from_page); - - /* to_page lock down */ if (!trylock_page(to_page)) { if ((mode & MIGRATE_MODE_MASK) == MIGRATE_ASYNC) goto out_unlock; @@ -404,7 +560,22 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page, lock_page(to_page); } - BUG_ON(PageWriteback(to_page)); + /* from_page is supposed to be an anonymous page */ + VM_BUG_ON_PAGE(PageWriteback(from_page), from_page); + + if (PageWriteback(to_page)) { + /* + * Only in the case of a full synchronous migration is it + * necessary to wait for PageWriteback. In the async case, + * the retry loop is too short and in the sync-light case, + * the overhead of stalling is too much + */ + if ((mode & MIGRATE_MODE_MASK) != MIGRATE_SYNC) { + rc = -EBUSY; + goto out_unlock; + } + wait_on_page_writeback(to_page); + } /* * By try_to_unmap(), page->mapcount goes down to 0 here. In this case, @@ -420,6 +591,9 @@ static int unmap_and_exchange(struct page *from_page, struct page *to_page, * because that implies that the anon page is no longer mapped * (and cannot be remapped so long as we hold the page lock). */ + if (PageAnon(from_page) && !PageKsm(from_page)) + from_anon_vma = page_get_anon_vma(from_page); + if (PageAnon(to_page) && !PageKsm(to_page)) to_anon_vma = page_get_anon_vma(to_page); @@ -753,7 +927,7 @@ static int exchange_page_mapping_concur(struct list_head *unmapped_list_ptr, /* actual page mapping exchange */ rc = exchange_page_move_mapping(to_page_mapping, from_page_mapping, - to_page, from_page, mode, 0, 0); + to_page, from_page, NULL, NULL, mode, 0, 0); if (rc) { if (one_pair->from_page_was_mapped) diff --git a/mm/internal.h b/mm/internal.h index a039459..cf63bf6 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -566,4 +566,13 @@ extern int exchange_page_mthread(struct page *to, struct page *from, extern int exchange_page_lists_mthread(struct page **to, struct page **from, int nr_pages); + +extern int exchange_two_pages(struct page *page1, struct page *page2); + +bool buffer_migrate_lock_buffers(struct buffer_head *head, + enum migrate_mode mode); +int writeout(struct address_space *mapping, struct page *page); +int expected_page_refs(struct address_space *mapping, struct page *page); + + #endif /* __MM_INTERNAL_H */ diff --git a/mm/migrate.c b/mm/migrate.c index ad02797..a0ca817 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -385,7 +385,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) } #endif -static int expected_page_refs(struct address_space *mapping, struct page *page) +int expected_page_refs(struct address_space *mapping, struct page *page) { int expected_count = 1; @@ -732,7 +732,7 @@ EXPORT_SYMBOL(migrate_page); #ifdef CONFIG_BLOCK /* Returns true if all buffers are successfully locked */ -static bool buffer_migrate_lock_buffers(struct buffer_head *head, +bool buffer_migrate_lock_buffers(struct buffer_head *head, enum migrate_mode mode) { struct buffer_head *bh = head; @@ -880,7 +880,7 @@ int buffer_migrate_page_norefs(struct address_space *mapping, /* * Writeback a page to clean the dirty state */ -static int writeout(struct address_space *mapping, struct page *page) +int writeout(struct address_space *mapping, struct page *page) { struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, -- 2.7.4