On 04/23/2015 11:03 PM, Kirill A. Shutemov wrote:
We need to prepare kernel to allow transhuge pages to be mapped with ptes too. We need to handle FOLL_SPLIT in follow_page_pte(). Also we use split_huge_page() directly instead of split_huge_page_pmd(). split_huge_page_pmd() will gone.
You still call split_huge_page_pmd() for the is_huge_zero_page(page) case. Also, of the code around split_huge_page() you basically took from split_huge_page_pmd() and open-coded into follow_page_mask(), you didn't include the mmu notifier calls. Why are they needed in split_huge_page_pmd() but not here?
Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Tested-by: Sasha Levin <sasha.levin@xxxxxxxxxx> --- mm/gup.c | 67 +++++++++++++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 18 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index 203781fa96a5..ebdb39b3e820 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -79,6 +79,19 @@ retry: page = pte_page(pte); } + if (flags & FOLL_SPLIT && PageTransCompound(page)) { + int ret; + get_page(page); + pte_unmap_unlock(ptep, ptl); + lock_page(page); + ret = split_huge_page(page); + unlock_page(page); + put_page(page); + if (ret) + return ERR_PTR(ret); + goto retry; + } + if (flags & FOLL_GET) get_page_foll(page); if (flags & FOLL_TOUCH) { @@ -186,27 +199,45 @@ struct page *follow_page_mask(struct vm_area_struct *vma, } if ((flags & FOLL_NUMA) && pmd_protnone(*pmd)) return no_page_table(vma, flags); - if (pmd_trans_huge(*pmd)) { - if (flags & FOLL_SPLIT) { + if (likely(!pmd_trans_huge(*pmd))) + return follow_page_pte(vma, address, pmd, flags); + + ptl = pmd_lock(mm, pmd); + if (unlikely(!pmd_trans_huge(*pmd))) { + spin_unlock(ptl); + return follow_page_pte(vma, address, pmd, flags); + } + + if (unlikely(pmd_trans_splitting(*pmd))) { + spin_unlock(ptl); + wait_split_huge_page(vma->anon_vma, pmd); + return follow_page_pte(vma, address, pmd, flags); + } + + if (flags & FOLL_SPLIT) { + int ret; + page = pmd_page(*pmd); + if (is_huge_zero_page(page)) { + spin_unlock(ptl); + ret = 0; split_huge_page_pmd(vma, address, pmd); - return follow_page_pte(vma, address, pmd, flags); - } - ptl = pmd_lock(mm, pmd); - if (likely(pmd_trans_huge(*pmd))) { - if (unlikely(pmd_trans_splitting(*pmd))) { - spin_unlock(ptl); - wait_split_huge_page(vma->anon_vma, pmd); - } else { - page = follow_trans_huge_pmd(vma, address, - pmd, flags); - spin_unlock(ptl); - *page_mask = HPAGE_PMD_NR - 1; - return page; - } - } else + } else { + get_page(page); spin_unlock(ptl); + lock_page(page); + ret = split_huge_page(page); + unlock_page(page); + put_page(page); + } + + return ret ? ERR_PTR(ret) : + follow_page_pte(vma, address, pmd, flags); } - return follow_page_pte(vma, address, pmd, flags); + + page = follow_trans_huge_pmd(vma, address, pmd, flags); + spin_unlock(ptl); + *page_mask = HPAGE_PMD_NR - 1; + return page; } static int get_gate_page(struct mm_struct *mm, unsigned long address,
-- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>