In the most of the cases, we don't expect any write access to COW-ed PTE table. To prevent this, add the new modification check to the page table check. But, there are still some of valid reasons where we might want to modify COW-ed PTE tables. Therefore, add the enable/disable function to the check. Signed-off-by: Chih-En Lin <shiyn.lin@xxxxxxxxx> --- arch/x86/include/asm/pgtable.h | 1 + include/linux/page_table_check.h | 62 ++++++++++++++++++++++++++++++++ mm/memory.c | 4 +++ mm/page_table_check.c | 58 ++++++++++++++++++++++++++++++ 4 files changed, 125 insertions(+) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 7425f32e5293..6b323c672e36 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1022,6 +1022,7 @@ static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) { + cowed_pte_table_check_modify(mm, addr, ptep, pte); page_table_check_pte_set(mm, addr, ptep, pte); set_pte(ptep, pte); } diff --git a/include/linux/page_table_check.h b/include/linux/page_table_check.h index 01e16c7696ec..4a54dc454281 100644 --- a/include/linux/page_table_check.h +++ b/include/linux/page_table_check.h @@ -113,6 +113,54 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm, __page_table_check_pte_clear_range(mm, addr, pmd); } +#ifdef CONFIG_COW_PTE +void __check_cowed_pte_table_enable(pte_t *ptep); +void __check_cowed_pte_table_disable(pte_t *ptep); +void __cowed_pte_table_check_modify(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte); + +static inline void check_cowed_pte_table_enable(pte_t *ptep) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __check_cowed_pte_table_enable(ptep); +} + +static inline void check_cowed_pte_table_disable(pte_t *ptep) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __check_cowed_pte_table_disable(ptep); +} + +static inline void cowed_pte_table_check_modify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ + if (static_branch_likely(&page_table_check_disabled)) + return; + + __cowed_pte_table_check_modify(mm, addr, ptep, pte); +} +#else +static inline void check_cowed_pte_table_enable(pte_t *ptep) +{ +} + +static inline void check_cowed_pte_table_disable(pte_t *ptep) +{ +} + +static inline void cowed_pte_table_check_modify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ +} +#endif /* CONFIG_COW_PTE */ + + #else static inline void page_table_check_alloc(struct page *page, unsigned int order) @@ -162,5 +210,19 @@ static inline void page_table_check_pte_clear_range(struct mm_struct *mm, { } +static inline void check_cowed_pte_table_enable(pte_t *ptep) +{ +} + +static inline void check_cowed_pte_table_disable(pte_t *ptep) +{ +} + +static inline void cowed_pte_table_check_modify(struct mm_struct *mm, + unsigned long addr, + pte_t *ptep, pte_t pte) +{ +} + #endif /* CONFIG_PAGE_TABLE_CHECK */ #endif /* __LINUX_PAGE_TABLE_CHECK_H */ diff --git a/mm/memory.c b/mm/memory.c index 7908e20f802a..e62487413038 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1202,10 +1202,12 @@ copy_cow_pte_range(struct vm_area_struct *dst_vma, * Although, parent's PTE is COW-ed, we should * still need to handle all the swap stuffs. */ + check_cowed_pte_table_disable(src_pte); ret = copy_nonpresent_pte(dst_mm, src_mm, src_pte, src_pte, curr, curr, addr, rss); + check_cowed_pte_table_enable(src_pte); if (ret == -EIO) { entry = pte_to_swp_entry(*src_pte); break; @@ -1223,8 +1225,10 @@ copy_cow_pte_range(struct vm_area_struct *dst_vma, * copy_present_pte() will determine the mapped page * should be COW mapping or not. */ + check_cowed_pte_table_disable(src_pte); ret = copy_present_pte(curr, curr, src_pte, src_pte, addr, rss, NULL); + check_cowed_pte_table_enable(src_pte); /* * If we need a pre-allocated page for this pte, * drop the lock, recover all the entries, fall diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 25d8610c0042..5175c7476508 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -14,6 +14,9 @@ struct page_table_check { atomic_t anon_map_count; atomic_t file_map_count; +#ifdef CONFIG_COW_PTE + atomic_t check_cowed_pte; +#endif }; static bool __page_table_check_enabled __initdata = @@ -248,3 +251,58 @@ void __page_table_check_pte_clear_range(struct mm_struct *mm, pte_unmap(ptep - PTRS_PER_PTE); } } + +#ifdef CONFIG_COW_PTE +void __check_cowed_pte_table_enable(pte_t *ptep) +{ + struct page *page = pte_page(*ptep); + struct page_ext *page_ext = page_ext_get(page); + struct page_table_check *ptc = get_page_table_check(page_ext); + + atomic_set(&ptc->check_cowed_pte, 1); + page_ext_put(page_ext); +} + +void __check_cowed_pte_table_disable(pte_t *ptep) +{ + struct page *page = pte_page(*ptep); + struct page_ext *page_ext = page_ext_get(page); + struct page_table_check *ptc = get_page_table_check(page_ext); + + atomic_set(&ptc->check_cowed_pte, 0); + page_ext_put(page_ext); +} + +static int check_cowed_pte_table(pte_t *ptep) +{ + struct page *page = pte_page(*ptep); + struct page_ext *page_ext = page_ext_get(page); + struct page_table_check *ptc = get_page_table_check(page_ext); + int check = 0; + + check = atomic_read(&ptc->check_cowed_pte); + page_ext_put(page_ext); + + return check; +} + +void __cowed_pte_table_check_modify(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + + if (!test_bit(MMF_COW_PTE, &mm->flags) || !check_cowed_pte_table(ptep)) + return; + + pgd = pgd_offset(mm, addr); + p4d = p4d_offset(pgd, addr); + pud = pud_offset(p4d, addr); + pmd = pmd_offset(pud, addr); + + if (!pmd_none(*pmd) && !pmd_write(*pmd) && cow_pte_count(pmd) > 1) + BUG_ON(!pte_same(*ptep, pte)); +} +#endif -- 2.34.1