New patch below with handle all the pages after splitted. --- include/linux/huge_mm.h | 2 ++ mm/shmem.c | 79 ++++++++++++++++++++++++++++++++++++------------- 2 files changed, 61 insertions(+), 20 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 65f90db..58b0208 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -64,6 +64,7 @@ extern pmd_t *page_check_address_pmd(struct page *page, #define HPAGE_PMD_SHIFT PMD_SHIFT #define HPAGE_PMD_SIZE ((1UL) << HPAGE_PMD_SHIFT) #define HPAGE_PMD_MASK (~(HPAGE_PMD_SIZE - 1)) +#define HPAGE_NR_PAGES HPAGE_PMD_NR extern bool is_vma_temporary_stack(struct vm_area_struct *vma); @@ -207,6 +208,7 @@ extern int do_huge_pmd_numa_page(struct mm_struct *mm, struct vm_area_struct *vm #define THP_READ_ALLOC_FAILED ({ BUILD_BUG(); 0; }) #define hpage_nr_pages(x) 1 +#define HPAGE_NR_PAGES 1 #define transparent_hugepage_enabled(__vma) 0 #define transparent_hugepage_defrag(__vma) 0 diff --git a/mm/shmem.c b/mm/shmem.c index 5bde8d0..b80ace7 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -862,14 +862,16 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) struct shmem_inode_info *info; struct address_space *mapping; struct inode *inode; - swp_entry_t swap; + swp_entry_t swap[HPAGE_NR_PAGES]; pgoff_t index; + int nr = 1; + int i; BUG_ON(!PageLocked(page)); mapping = page->mapping; - index = page->index; inode = mapping->host; info = SHMEM_I(inode); + if (info->flags & VM_LOCKED) goto redirty; if (!total_swap_pages) @@ -887,6 +889,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) goto redirty; } + index = page->index; /* * This is somewhat ridiculous, but without plumbing a SWAP_MAP_FALLOC * value into swapfile.c, the only way we can correctly account for a @@ -906,21 +909,35 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (shmem_falloc && index >= shmem_falloc->start && index < shmem_falloc->next) - shmem_falloc->nr_unswapped++; + shmem_falloc->nr_unswapped += + hpagecache_nr_pages(page); else shmem_falloc = NULL; spin_unlock(&inode->i_lock); if (shmem_falloc) goto redirty; } - clear_highpage(page); + clear_pagecache_page(page); flush_dcache_page(page); SetPageUptodate(page); } - swap = get_swap_page(); - if (!swap.val) - goto redirty; + /* We can only have nr correct after huge page splitted, + * otherwise, it will fail the redirty logic + */ + nr = hpagecache_nr_pages(page); + /* We have to break the huge page at this point, + * since we have no idea how to swap a huge page. + */ + if (PageTransHugeCache(page)) + split_huge_page(compound_trans_head(page)); + + /* Pre-allocate all the swap pages */ + for (i = 0; i < nr; i++) { + swap[i] = get_swap_page(); + if (!swap[i].val) + goto undo_alloc_swap; + } /* * Add inode to shmem_unuse()'s list of swapped-out inodes, @@ -934,25 +951,47 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc) if (list_empty(&info->swaplist)) list_add_tail(&info->swaplist, &shmem_swaplist); - if (add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) { - swap_shmem_alloc(swap); - shmem_delete_from_page_cache(page, swp_to_radix_entry(swap)); + for (i = 0; i < nr; i++) { + if (add_to_swap_cache(page + i, swap[i], GFP_ATOMIC)) + goto undo_add_to_swap_cache; + } - spin_lock(&info->lock); - info->swapped++; - shmem_recalc_inode(inode); - spin_unlock(&info->lock); + /* We make sure everything is correct before moving further */ + for (i = 0; i < nr; i++) { + swap_shmem_alloc(swap[i]); + shmem_delete_from_page_cache(page + i, + swp_to_radix_entry(swap[i])); + } - mutex_unlock(&shmem_swaplist_mutex); - BUG_ON(page_mapped(page)); - swap_writepage(page, wbc); - return 0; + spin_lock(&info->lock); + info->swapped += nr; + shmem_recalc_inode(inode); + spin_unlock(&info->lock); + + mutex_unlock(&shmem_swaplist_mutex); + + for (i = 0; i < nr; i++) { + BUG_ON(page_mapped(page + i)); + swap_writepage(page + i, wbc); } + return 0; + +undo_add_to_swap_cache: + while (i) { + i--; + __delete_from_swap_cache(page + i); + } mutex_unlock(&shmem_swaplist_mutex); - swapcache_free(swap, NULL); + i = nr; +undo_alloc_swap: + while (i) { + i--; + swapcache_free(swap[i], NULL); + } redirty: - set_page_dirty(page); + for (i = 0; i < nr; i++) + set_page_dirty(page + i); if (wbc->for_reclaim) return AOP_WRITEPAGE_ACTIVATE; /* Return with page locked */ unlock_page(page); -- Best wishes, -- Ning Qu (曲宁) | Software Engineer | quning@xxxxxxxxxx | +1-408-418-6066 On Tue, Oct 15, 2013 at 12:00 PM, Ning Qu <quning@xxxxxxxxxx> wrote: > Let me take another look at that logic. Thanks! > Best wishes, > -- > Ning Qu (曲宁) | Software Engineer | quning@xxxxxxxxxx | +1-408-418-6066 > > > On Tue, Oct 15, 2013 at 3:33 AM, Kirill A. Shutemov > <kirill.shutemov@xxxxxxxxxxxxxxx> wrote: > > Ning Qu wrote: > >> in shmem_writepage, we have to split the huge page when moving pages > >> from page cache to swap because we don't support huge page in swap > >> yet. > >> > >> Signed-off-by: Ning Qu <quning@xxxxxxxxx> > >> --- > >> mm/shmem.c | 9 ++++++++- > >> 1 file changed, 8 insertions(+), 1 deletion(-) > >> > >> diff --git a/mm/shmem.c b/mm/shmem.c > >> index 8fe17dd..68a0e1d 100644 > >> --- a/mm/shmem.c > >> +++ b/mm/shmem.c > >> @@ -898,6 +898,13 @@ static int shmem_writepage(struct page *page, > struct writeback_control *wbc) > >> swp_entry_t swap; > >> pgoff_t index; > >> > >> + /* TODO: we have to break the huge page at this point, > >> + * since we have no idea how to recover a huge page from > >> + * swap. > >> + */ > >> + if (PageTransCompound(page)) > >> + split_huge_page(compound_trans_head(page)); > >> + > > > > After the split you handle here only first small page of the huge page. > > Is it what we want to do? Should we swap out all small pages of the huge > > page? > > > > -- > > Kirill A. Shutemov > > -- > To unsubscribe, send a message with 'unsubscribe linux-mm' in > the body to majordomo@xxxxxxxxx. For more info on Linux MM, > see: http://www.linux-mm.org/ . > Don't email: <a hrefmailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a> >
New patch below with handle all the pages after splitted.
Best wishes,
--
Ning Qu (曲宁) | Software Engineer | quning@xxxxxxxxxx | +1-408-418-6066
Ning Qu (曲宁) | Software Engineer | quning@xxxxxxxxxx | +1-408-418-6066
On Tue, Oct 15, 2013 at 12:00 PM, Ning Qu <quning@xxxxxxxxxx> wrote:
Let me take another look at that logic. Thanks!
Best wishes,
--
Ning Qu (曲宁) | Software Engineer | quning@xxxxxxxxxx | +1-408-418-6066
Don't email: <a hrefmailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>
On Tue, Oct 15, 2013 at 3:33 AM, Kirill A. Shutemov
<kirill.shutemov@xxxxxxxxxxxxxxx> wrote:
> Ning Qu wrote:
>> in shmem_writepage, we have to split the huge page when moving pages
>> from page cache to swap because we don't support huge page in swap
>> yet.
>>
>> Signed-off-by: Ning Qu <quning@xxxxxxxxx>
>> ---
>> mm/shmem.c | 9 ++++++++-
>> 1 file changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/mm/shmem.c b/mm/shmem.c
>> index 8fe17dd..68a0e1d 100644
>> --- a/mm/shmem.c
>> +++ b/mm/shmem.c
>> @@ -898,6 +898,13 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
>> swp_entry_t swap;
>> pgoff_t index;
>>
>> + /* TODO: we have to break the huge page at this point,
>> + * since we have no idea how to recover a huge page from
>> + * swap.
>> + */
>> + if (PageTransCompound(page))
>> + split_huge_page(compound_trans_head(page));
>> +
>
> After the split you handle here only first small page of the huge page.
> Is it what we want to do? Should we swap out all small pages of the huge
> page?
>
> --
> Kirill A. Shutemov
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx. For more info on Linux MM,
see: http://www.linux-mm.org/ .