On Mon, Feb 23, 2015 at 05:21:31PM +0100, Vlastimil Babka wrote: > On 02/12/2015 05:18 PM, Kirill A. Shutemov wrote: > > We're going to allow mapping of individual 4k pages of THP compound > > page. It means we cannot rely on PageTransHuge() check to decide if map > > small page or THP. > > > > The patch adds new argument to rmap function to indicate whethe we want > > to map whole compound page or only the small page. > > > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > > --- > > include/linux/rmap.h | 14 +++++++++++--- > > kernel/events/uprobes.c | 4 ++-- > > mm/huge_memory.c | 16 ++++++++-------- > > mm/hugetlb.c | 4 ++-- > > mm/ksm.c | 4 ++-- > > mm/memory.c | 14 +++++++------- > > mm/migrate.c | 8 ++++---- > > mm/rmap.c | 43 +++++++++++++++++++++++++++---------------- > > mm/swapfile.c | 4 ++-- > > 9 files changed, 65 insertions(+), 46 deletions(-) > > > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > > index c4088feac1fc..3bf73620b672 100644 > > --- a/include/linux/rmap.h > > +++ b/include/linux/rmap.h > > @@ -168,16 +168,24 @@ static inline void anon_vma_merge(struct vm_area_struct *vma, > > > > struct anon_vma *page_get_anon_vma(struct page *page); > > > > +/* flags for do_page_add_anon_rmap() */ > > +enum { > > + RMAP_EXCLUSIVE = 1, > > + RMAP_COMPOUND = 2, > > +}; > > + > > /* > > * rmap interfaces called when adding or removing pte of page > > */ > > void page_move_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); > > -void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); > > +void page_add_anon_rmap(struct page *, struct vm_area_struct *, > > + unsigned long, bool); > > void do_page_add_anon_rmap(struct page *, struct vm_area_struct *, > > unsigned long, int); > > -void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long); > > +void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, > > + unsigned long, bool); > > void page_add_file_rmap(struct page *); > > -void page_remove_rmap(struct page *); > > +void page_remove_rmap(struct page *, bool); > > > > void hugepage_add_anon_rmap(struct page *, struct vm_area_struct *, > > unsigned long); > > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c > > index cb346f26a22d..5523daf59953 100644 > > --- a/kernel/events/uprobes.c > > +++ b/kernel/events/uprobes.c > > @@ -183,7 +183,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, > > goto unlock; > > > > get_page(kpage); > > - page_add_new_anon_rmap(kpage, vma, addr); > > + page_add_new_anon_rmap(kpage, vma, addr, false); > > mem_cgroup_commit_charge(kpage, memcg, false); > > lru_cache_add_active_or_unevictable(kpage, vma); > > > > @@ -196,7 +196,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, > > ptep_clear_flush_notify(vma, addr, ptep); > > set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); > > > > - page_remove_rmap(page); > > + page_remove_rmap(page, false); > > if (!page_mapped(page)) > > try_to_free_swap(page); > > pte_unmap_unlock(ptep, ptl); > > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > > index 5f4c97e1a6da..36637a80669e 100644 > > --- a/mm/huge_memory.c > > +++ b/mm/huge_memory.c > > @@ -743,7 +743,7 @@ static int __do_huge_pmd_anonymous_page(struct mm_struct *mm, > > pmd_t entry; > > entry = mk_huge_pmd(page, vma->vm_page_prot); > > entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); > > - page_add_new_anon_rmap(page, vma, haddr); > > + page_add_new_anon_rmap(page, vma, haddr, true); > > mem_cgroup_commit_charge(page, memcg, false); > > lru_cache_add_active_or_unevictable(page, vma); > > pgtable_trans_huge_deposit(mm, pmd, pgtable); > > @@ -1034,7 +1034,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, > > entry = maybe_mkwrite(pte_mkdirty(entry), vma); > > memcg = (void *)page_private(pages[i]); > > set_page_private(pages[i], 0); > > - page_add_new_anon_rmap(pages[i], vma, haddr); > > + page_add_new_anon_rmap(pages[i], vma, haddr, false); > > mem_cgroup_commit_charge(pages[i], memcg, false); > > lru_cache_add_active_or_unevictable(pages[i], vma); > > pte = pte_offset_map(&_pmd, haddr); > > @@ -1046,7 +1046,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm, > > > > smp_wmb(); /* make pte visible before pmd */ > > pmd_populate(mm, pmd, pgtable); > > - page_remove_rmap(page); > > + page_remove_rmap(page, true); > > spin_unlock(ptl); > > > > mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); > > @@ -1168,7 +1168,7 @@ alloc: > > entry = mk_huge_pmd(new_page, vma->vm_page_prot); > > entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); > > pmdp_clear_flush_notify(vma, haddr, pmd); > > - page_add_new_anon_rmap(new_page, vma, haddr); > > + page_add_new_anon_rmap(new_page, vma, haddr, true); > > mem_cgroup_commit_charge(new_page, memcg, false); > > lru_cache_add_active_or_unevictable(new_page, vma); > > set_pmd_at(mm, haddr, pmd, entry); > > @@ -1178,7 +1178,7 @@ alloc: > > put_huge_zero_page(); > > } else { > > VM_BUG_ON_PAGE(!PageHead(page), page); > > - page_remove_rmap(page); > > + page_remove_rmap(page, true); > > put_page(page); > > } > > ret |= VM_FAULT_WRITE; > > @@ -1431,7 +1431,7 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, > > put_huge_zero_page(); > > } else { > > page = pmd_page(orig_pmd); > > - page_remove_rmap(page); > > + page_remove_rmap(page, true); > > VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); > > add_mm_counter(tlb->mm, MM_ANONPAGES, -HPAGE_PMD_NR); > > VM_BUG_ON_PAGE(!PageHead(page), page); > > @@ -2368,7 +2368,7 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page, > > * superfluous. > > */ > > pte_clear(vma->vm_mm, address, _pte); > > - page_remove_rmap(src_page); > > + page_remove_rmap(src_page, false); > > spin_unlock(ptl); > > free_page_and_swap_cache(src_page); > > } > > @@ -2658,7 +2658,7 @@ static void collapse_huge_page(struct mm_struct *mm, > > > > spin_lock(pmd_ptl); > > BUG_ON(!pmd_none(*pmd)); > > - page_add_new_anon_rmap(new_page, vma, address); > > + page_add_new_anon_rmap(new_page, vma, address, true); > > mem_cgroup_commit_charge(new_page, memcg, false); > > lru_cache_add_active_or_unevictable(new_page, vma); > > pgtable_trans_huge_deposit(mm, pmd, pgtable); > > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > > index 0a9ac6c26832..ebb7329301c4 100644 > > --- a/mm/hugetlb.c > > +++ b/mm/hugetlb.c > > @@ -2688,7 +2688,7 @@ again: > > if (huge_pte_dirty(pte)) > > set_page_dirty(page); > > > > - page_remove_rmap(page); > > + page_remove_rmap(page, true); > > force_flush = !__tlb_remove_page(tlb, page); > > if (force_flush) { > > address += sz; > > @@ -2908,7 +2908,7 @@ retry_avoidcopy: > > mmu_notifier_invalidate_range(mm, mmun_start, mmun_end); > > set_huge_pte_at(mm, address, ptep, > > make_huge_pte(vma, new_page, 1)); > > - page_remove_rmap(old_page); > > + page_remove_rmap(old_page, true); > > hugepage_add_new_anon_rmap(new_page, vma, address); > > /* Make the old page be freed below */ > > new_page = old_page; > > diff --git a/mm/ksm.c b/mm/ksm.c > > index 4162dce2eb44..92182eeba87d 100644 > > --- a/mm/ksm.c > > +++ b/mm/ksm.c > > @@ -957,13 +957,13 @@ static int replace_page(struct vm_area_struct *vma, struct page *page, > > } > > > > get_page(kpage); > > - page_add_anon_rmap(kpage, vma, addr); > > + page_add_anon_rmap(kpage, vma, addr, false); > > > > flush_cache_page(vma, addr, pte_pfn(*ptep)); > > ptep_clear_flush_notify(vma, addr, ptep); > > set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot)); > > > > - page_remove_rmap(page); > > + page_remove_rmap(page, false); > > if (!page_mapped(page)) > > try_to_free_swap(page); > > put_page(page); > > diff --git a/mm/memory.c b/mm/memory.c > > index 8ae52c918415..5529627d2cd6 100644 > > --- a/mm/memory.c > > +++ b/mm/memory.c > > @@ -1125,7 +1125,7 @@ again: > > mark_page_accessed(page); > > rss[MM_FILEPAGES]--; > > } > > - page_remove_rmap(page); > > + page_remove_rmap(page, false); > > if (unlikely(page_mapcount(page) < 0)) > > print_bad_pte(vma, addr, ptent, page); > > if (unlikely(!__tlb_remove_page(tlb, page))) { > > @@ -2189,7 +2189,7 @@ gotten: > > * thread doing COW. > > */ > > ptep_clear_flush_notify(vma, address, page_table); > > - page_add_new_anon_rmap(new_page, vma, address); > > + page_add_new_anon_rmap(new_page, vma, address, false); > > mem_cgroup_commit_charge(new_page, memcg, false); > > lru_cache_add_active_or_unevictable(new_page, vma); > > /* > > @@ -2222,7 +2222,7 @@ gotten: > > * mapcount is visible. So transitively, TLBs to > > * old page will be flushed before it can be reused. > > */ > > - page_remove_rmap(old_page); > > + page_remove_rmap(old_page, false); > > } > > > > /* Free the old page.. */ > > @@ -2465,7 +2465,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, > > pte = maybe_mkwrite(pte_mkdirty(pte), vma); > > flags &= ~FAULT_FLAG_WRITE; > > ret |= VM_FAULT_WRITE; > > - exclusive = 1; > > + exclusive = RMAP_EXCLUSIVE; > > } > > flush_icache_page(vma, page); > > if (pte_swp_soft_dirty(orig_pte)) > > @@ -2475,7 +2475,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma, > > do_page_add_anon_rmap(page, vma, address, exclusive); > > mem_cgroup_commit_charge(page, memcg, true); > > } else { /* ksm created a completely new copy */ > > - page_add_new_anon_rmap(page, vma, address); > > + page_add_new_anon_rmap(page, vma, address, false); > > mem_cgroup_commit_charge(page, memcg, false); > > lru_cache_add_active_or_unevictable(page, vma); > > } > > @@ -2613,7 +2613,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, > > goto release; > > > > inc_mm_counter_fast(mm, MM_ANONPAGES); > > - page_add_new_anon_rmap(page, vma, address); > > + page_add_new_anon_rmap(page, vma, address, false); > > mem_cgroup_commit_charge(page, memcg, false); > > lru_cache_add_active_or_unevictable(page, vma); > > setpte: > > @@ -2701,7 +2701,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, > > entry = maybe_mkwrite(pte_mkdirty(entry), vma); > > if (anon) { > > inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); > > - page_add_new_anon_rmap(page, vma, address); > > + page_add_new_anon_rmap(page, vma, address, false); > > } else { > > inc_mm_counter_fast(vma->vm_mm, MM_FILEPAGES); > > page_add_file_rmap(page); > > diff --git a/mm/migrate.c b/mm/migrate.c > > index 85e042686031..0d2b3110277a 100644 > > --- a/mm/migrate.c > > +++ b/mm/migrate.c > > @@ -166,7 +166,7 @@ static int remove_migration_pte(struct page *new, struct vm_area_struct *vma, > > else > > page_dup_rmap(new); > > } else if (PageAnon(new)) > > - page_add_anon_rmap(new, vma, addr); > > + page_add_anon_rmap(new, vma, addr, false); > > else > > page_add_file_rmap(new); > > > > @@ -1803,7 +1803,7 @@ fail_putback: > > * guarantee the copy is visible before the pagetable update. > > */ > > flush_cache_range(vma, mmun_start, mmun_end); > > - page_add_anon_rmap(new_page, vma, mmun_start); > > + page_add_anon_rmap(new_page, vma, mmun_start, true); > > pmdp_clear_flush_notify(vma, mmun_start, pmd); > > set_pmd_at(mm, mmun_start, pmd, entry); > > flush_tlb_range(vma, mmun_start, mmun_end); > > @@ -1814,13 +1814,13 @@ fail_putback: > > flush_tlb_range(vma, mmun_start, mmun_end); > > mmu_notifier_invalidate_range(mm, mmun_start, mmun_end); > > update_mmu_cache_pmd(vma, address, &entry); > > - page_remove_rmap(new_page); > > + page_remove_rmap(new_page, true); > > goto fail_putback; > > } > > > > mem_cgroup_migrate(page, new_page, false); > > > > - page_remove_rmap(page); > > + page_remove_rmap(page, true); > > > > spin_unlock(ptl); > > mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); > > diff --git a/mm/rmap.c b/mm/rmap.c > > index 47b3ba87c2dd..f67e83be75e4 100644 > > --- a/mm/rmap.c > > +++ b/mm/rmap.c > > @@ -1041,9 +1041,9 @@ static void __page_check_anon_rmap(struct page *page, > > * (but PageKsm is never downgraded to PageAnon). > > */ > > void page_add_anon_rmap(struct page *page, > > - struct vm_area_struct *vma, unsigned long address) > > + struct vm_area_struct *vma, unsigned long address, bool compound) > > { > > - do_page_add_anon_rmap(page, vma, address, 0); > > + do_page_add_anon_rmap(page, vma, address, compound ? RMAP_COMPOUND : 0); > > } > > > > /* > > @@ -1052,21 +1052,24 @@ void page_add_anon_rmap(struct page *page, > > * Everybody else should continue to use page_add_anon_rmap above. > > */ > > void do_page_add_anon_rmap(struct page *page, > > - struct vm_area_struct *vma, unsigned long address, int exclusive) > > + struct vm_area_struct *vma, unsigned long address, int flags) > > { > > int first = atomic_inc_and_test(&page->_mapcount); > > if (first) { > > + bool compound = flags & RMAP_COMPOUND; > > + int nr = compound ? hpage_nr_pages(page) : 1; > > hpage_nr_pages(page) is: > > static inline int hpage_nr_pages(struct page *page) > { > if (unlikely(PageTransHuge(page))) > return HPAGE_PMD_NR; > return 1; > } > > and later... > > > /* > > * We use the irq-unsafe __{inc|mod}_zone_page_stat because > > * these counters are not modified in interrupt context, and > > * pte lock(a spinlock) is held, which implies preemption > > * disabled. > > */ > > - if (PageTransHuge(page)) > > + if (compound) { > > + VM_BUG_ON_PAGE(!PageTransHuge(page), page); > > this means that we could assume that > (compound == true) => (PageTransHuge(page) == true) > > and simplify above to: > > int nr = compound ? HPAGE_PMD_NR : 1; > > Right? No. HPAGE_PMD_NR is defined based on HPAGE_PMD_SHIFT which is BUILD_BUG() without CONFIG_TRANSPARENT_HUGEPAGE. We will get compiler error without the helper. -- Kirill A. Shutemov -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>