Migration Cache 2/5 - Add checks for swap vs migration cache This patch add the necessary checks for whether a page that appears to be in the swap cache is really in the migration cache. Most of these checks are hidden behind the normal swap interfaces, and are, thus, limited to the swap sources. However, a couple of them spill over into mm/memory.c and vmscan.c. Signed-off-by: Lee Schermerhorn <lee.schermerhorn@xxxxxx> include/linux/mm.h | 19 ++++++++++++++++--- include/linux/swap.h | 46 ++++++++++++++++++++++++++++++++++++++++++++-- mm/memory.c | 6 +++--- mm/shmem.c | 2 +- mm/swap_state.c | 50 +++++++++++++++++++++++++++++++++++++++++++++----- mm/swapfile.c | 32 +++++++++++++++++++++++++++++++- mm/vmscan.c | 18 +++++++++++++++--- 7 files changed, 155 insertions(+), 18 deletions(-) Index: linux-2.6.36-mmotm-101103-1217/mm/swap_state.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/swap_state.c +++ linux-2.6.36-mmotm-101103-1217/mm/swap_state.c @@ -298,13 +298,22 @@ int add_to_migration_cache(struct page * page_cache_get(page); set_page_private(page, entry.val); SetPageSwapCache(page); - SetPageUptodate(page); /* like add_to_swap() */ + SetPageUptodate(page); /* like __add_to_swap() */ } spin_unlock_irq(&migration_space.tree_lock); radix_tree_preload_end(); } return error; } +#else + +/* + * should never be called. + */ +static int add_to_migration_cache(struct page *page, int gfp_mask) +{ + return 0; +} #endif /* CONFIG_MIGRATION_CACHE */ @@ -425,13 +434,15 @@ void __delete_from_swap_cache(struct pag } /** - * add_to_swap - allocate swap space for a page + * __add_to_swap - allocate swap space for a page * @page: page we want to move to swap + * @migration: add to migration cache instead * - * Allocate swap space for the page and add the page to the - * swap cache. Caller needs to hold the page lock. + * Allocate a swap space page or migration cache index for page + * and add the page to the appropriate cache. + * Caller needs to hold the page lock. */ -int add_to_swap(struct page *page) +int __add_to_swap(struct page * page, int migration) { swp_entry_t entry; int err; @@ -439,6 +450,10 @@ int add_to_swap(struct page *page) VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(!PageUptodate(page)); + if (MIGRATION_CACHE && migration) + return (!add_to_migration_cache(page, + __GFP_HIGH|__GFP_NOMEMALLOC|__GFP_NOWARN)); + entry = get_swap_page(); if (!entry.val) return 0; @@ -481,6 +496,9 @@ void delete_from_swap_cache(struct page swp_entry_t entry; entry.val = page_private(page); +//TODO: this needed? + if (is_migration_cache(entry)) + return; spin_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); @@ -547,6 +565,9 @@ struct page * lookup_swap_cache(swp_entr { struct page *page; + if (is_migration_cache(entry)) + return lookup_migration_cache(entry); + page = find_get_page(&swapper_space, entry.val); if (page) @@ -570,6 +591,16 @@ struct page *read_swap_cache_async(swp_e struct page *found_page, *new_page = NULL; int err; + if (is_migration_cache(entry)) { + /* + * If we get here, we probably have a race between 2 + * tasks sharing the mm_struct & page_table. Just + * return NULL and let do_swap_page() deal with it. + */ + mpol_cond_put(pol); /* drop incoming ref */ + return NULL; + } + do { /* * First check the swap cache. Since this is normally @@ -674,6 +705,15 @@ struct page *swapin_readahead(swp_entry_ pol = mpol_cond_copy(&mpol, pol); /* + * Return page from migration cache in case do_swap_page() or + * shmem_swapin() [can it?] hands us one of these. + */ + if (is_migration_cache(entry)) { + mpol_cond_put(pol); + return lookup_migration_cache(entry); + } + + /* * Get starting offset for readaround, and number of pages to read. * Adjust starting address by readbehind (for NUMA interleave case)? * No, it's very unlikely that swap layout would follow vma layout, Index: linux-2.6.36-mmotm-101103-1217/mm/swapfile.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/swapfile.c +++ linux-2.6.36-mmotm-101103-1217/mm/swapfile.c @@ -602,15 +602,21 @@ static unsigned char swap_entry_free(str * Caller has made sure that the swapdevice corresponding to entry * is still around or has not been recycled. */ -void swap_free(swp_entry_t entry) +int swap_free(swp_entry_t entry) { struct swap_info_struct *p; + if (is_migration_cache(entry)) { + migration_remove_reference_entry(entry); + return 0; /* don't "try_to_free" mig cache entries */ + } + p = swap_info_get(entry); if (p) { swap_entry_free(p, entry, 1); spin_unlock(&swap_lock); } + return 1; /* OK to try to free */ } /* @@ -621,6 +627,11 @@ void swapcache_free(swp_entry_t entry, s struct swap_info_struct *p; unsigned char count; + if (is_migration_cache(entry)) { + migration_remove_reference_entry(entry); + return; + } + p = swap_info_get(entry); if (p) { count = swap_entry_free(p, entry, SWAP_HAS_CACHE); @@ -642,6 +653,10 @@ static inline int page_swapcount(struct swp_entry_t entry; entry.val = page_private(page); + + if (is_migration_cache(entry)) + return migration_ref_count(entry); + p = swap_info_get(entry); if (p) { count = swap_count(p->swap_map[swp_offset(entry)]); @@ -684,6 +699,8 @@ int try_to_free_swap(struct page *page) if (!PageSwapCache(page)) return 0; + if (page_in_migration_cache(page)) + return 0; if (PageWriteback(page)) return 0; if (page_swapcount(page)) @@ -721,6 +738,11 @@ int free_swap_and_cache(swp_entry_t entr struct swap_info_struct *p; struct page *page = NULL; + if (is_migration_cache(entry)) { + migration_remove_reference_entry(entry); + return 1; + } + if (non_swap_entry(entry)) return 1; @@ -1898,11 +1920,16 @@ SYSCALL_DEFINE2(swapon, const char __use break; } error = -EPERM; + + /* + * MIGRATION entries stolen from top of swapfiles space + */ if (type >= MAX_SWAPFILES) { spin_unlock(&swap_lock); kfree(p); goto out; } + if (type >= nr_swapfiles) { p->type = type; swap_info[type] = p; @@ -2289,6 +2316,9 @@ int swap_duplicate(swp_entry_t entry) { int err = 0; + if (is_migration_cache(entry)) + return migration_duplicate(entry); + while (!err && __swap_duplicate(entry, 1) == -ENOMEM) err = add_swap_count_continuation(entry, GFP_ATOMIC); return err; Index: linux-2.6.36-mmotm-101103-1217/include/linux/mm.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/include/linux/mm.h +++ linux-2.6.36-mmotm-101103-1217/include/linux/mm.h @@ -630,6 +630,14 @@ void page_address_init(void); #define page_address_init() do { } while(0) #endif +#ifdef CONFIG_MIGRATION_CACHE +//TODO: can I make this 'static inline' here? header dependencies? +extern int page_in_migration_cache(struct page *); +extern struct address_space migration_space; +#else +#define page_in_migration_cache(p) (0) +#endif + /* * On an anonymous page mapped into a user virtual memory area, * page->mapping points to its anon_vma, not to a struct address_space; @@ -656,9 +664,14 @@ static inline struct address_space *page struct address_space *mapping = page->mapping; VM_BUG_ON(PageSlab(page)); - if (unlikely(PageSwapCache(page))) - mapping = &swapper_space; - else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) + if (unlikely(PageSwapCache(page))) { +#ifdef CONFIG_MIGRATION_CACHE + if (page_in_migration_cache(page)) + mapping = &migration_space; + else +#endif + mapping = &swapper_space; + } else if (unlikely((unsigned long)mapping & PAGE_MAPPING_ANON)) mapping = NULL; return mapping; } Index: linux-2.6.36-mmotm-101103-1217/mm/vmscan.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/vmscan.c +++ linux-2.6.36-mmotm-101103-1217/mm/vmscan.c @@ -756,11 +756,23 @@ static unsigned long shrink_page_list(st * Anonymous process memory has backing store? * Try to allocate it some swap space here. */ - if (PageAnon(page) && !PageSwapCache(page)) { + if (PageAnon(page) && (!PageSwapCache(page) || + page_in_migration_cache(page))) { if (!(sc->gfp_mask & __GFP_IO)) goto keep_locked; - if (!add_to_swap(page)) - goto activate_locked; + if (!check_add_to_swap(page)) { + if (PageSwapCache(page)) { + /* + * move from mig cache failed + */ + goto keep_locked; + } else { + /* + * add to swap cache failed + */ + goto activate_locked; + } + } may_enter_fs = 1; } Index: linux-2.6.36-mmotm-101103-1217/include/linux/swap.h =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/include/linux/swap.h +++ linux-2.6.36-mmotm-101103-1217/include/linux/swap.h @@ -319,7 +319,7 @@ extern void end_swap_bio_read(struct bio extern struct address_space swapper_space; #define total_swapcache_pages swapper_space.nrpages extern void show_swap_cache_info(void); -extern int add_to_swap(struct page *); +extern int __add_to_swap(struct page *, int); extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t); extern void __delete_from_swap_cache(struct page *); extern void delete_from_swap_cache(struct page *); @@ -342,7 +342,7 @@ extern int add_swap_count_continuation(s extern void swap_shmem_alloc(swp_entry_t); extern int swap_duplicate(swp_entry_t); extern int swapcache_prepare(swp_entry_t); -extern void swap_free(swp_entry_t); +extern int swap_free(swp_entry_t); extern void swapcache_free(swp_entry_t, struct page *page); extern int free_swap_and_cache(swp_entry_t); extern int swap_type_of(dev_t, sector_t, struct block_device **); @@ -396,6 +396,7 @@ static inline void mem_cgroup_uncharge_s /* * The Migration Cache: a pseudo-swap cache for anon pages */ +#define MIGRATION_CACHE 1 extern int page_in_migration_cache(struct page *); extern void migration_remove_reference_page(struct page *); extern void migration_remove_reference_entry(swp_entry_t); @@ -404,6 +405,46 @@ extern int migration_add_reference_page extern int migration_ref_count(swp_entry_t); extern void __migration_remove_reference(struct page *, swp_entry_t); extern struct page *lookup_migration_cache(swp_entry_t); + +#ifdef PAGE_FLAGS_PRIVATE /* only where this is defined */ +/** + * check_add_to_swap() -- add page to swap or migration cache if needed + * @page: page to check/add + * + * For vmscan:shrink_page_list(): + * if @page in migration cache, fail -- until "move to swap" available. + * if @page already in swap cache -- OK to swap out. + * else try to add @page to swap cache + */ +static inline int check_add_to_swap(struct page * page) +{ + if (PageSwapCache(page)) { + if (page_in_migration_cache(page)) + return 0; /* Fail -- TODO: move to swap */ + else + return 1; /* already in swap cache */ + } + + return __add_to_swap(page, 0); /* normal swap cache */ +} +#endif + +#else /* [CONFIG_SWAP &&] !CONFIG_MIGRATION_CACHE */ + +#ifdef PAGE_FLAGS_PRIVATE /* only where this is defined */ +/* + * version of check_add_to_swap for !CONFIG_MIGRATION_CACHE + * Try to add @page to swap cache if not already there. + */ +static inline int check_add_to_swap(struct page * page) +{ + if (PageSwapCache(page)) + return 1; + + return __add_to_swap(page, 0); /* normal swap cache */ +} +#endif + #endif /* CONFIG_MIGRATION_CACHE */ #else /* CONFIG_SWAP */ @@ -533,6 +574,7 @@ mem_cgroup_count_swap_user(swp_entry_t e #endif /* CONFIG_SWAP */ #ifndef CONFIG_MIGRATION_CACHE +#define MIGRATION_CACHE 0 static inline void migration_remove_reference_page(struct page *p) { } static inline void migration_remove_reference_entry(swp_entry_t e) { } static inline int migration_duplicate(swp_entry_t e) { return 0; } Index: linux-2.6.36-mmotm-101103-1217/mm/shmem.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/shmem.c +++ linux-2.6.36-mmotm-101103-1217/mm/shmem.c @@ -1337,12 +1337,12 @@ repeat: delete_from_swap_cache(swappage); spin_unlock(&info->lock); copy_highpage(filepage, swappage); + swap_free(swap); /* free while locked */ unlock_page(swappage); page_cache_release(swappage); flush_dcache_page(filepage); SetPageUptodate(filepage); set_page_dirty(filepage); - swap_free(swap); } else if (!(error = add_to_page_cache_locked(swappage, mapping, idx, GFP_NOWAIT))) { info->flags |= SHMEM_PAGEIN; Index: linux-2.6.36-mmotm-101103-1217/mm/memory.c =================================================================== --- linux-2.6.36-mmotm-101103-1217.orig/mm/memory.c +++ linux-2.6.36-mmotm-101103-1217/mm/memory.c @@ -2639,7 +2639,7 @@ static int do_swap_page(struct mm_struct goto out; entry = pte_to_swp_entry(orig_pte); - if (unlikely(non_swap_entry(entry))) { + if (unlikely(non_swap_entry(entry) && !is_migration_cache(entry))) { if (is_migration_entry(entry)) { migration_entry_wait(mm, pmd, address); } else if (is_hwpoison_entry(entry)) { @@ -2783,8 +2783,8 @@ static int do_swap_page(struct mm_struct /* It's better to call commit-charge after rmap is established */ mem_cgroup_commit_charge_swapin(page, ptr); - swap_free(entry); - if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) + if (swap_free(entry) && + (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))) try_to_free_swap(page); unlock_page(page); if (swapcache) { -- To unsubscribe from this list: send the line "unsubscribe linux-numa" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html