This is a note to let you know that I've just added the patch titled mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte() to the 4.14-stable tree which can be found at: http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary The filename of the patch is: mm-page_vma_mapped-drop-faulty-pointer-arithmetics-in-check_pte.patch and it can be found in the queue-4.14 subdirectory. If you, or anyone else, feels it should not be added to the stable tree, please let <stable@xxxxxxxxxxxxxxx> know about it. >From 0d665e7b109d512b7cae3ccef6e8654714887844 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx> Date: Fri, 19 Jan 2018 15:49:24 +0300 Subject: mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte() From: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> commit 0d665e7b109d512b7cae3ccef6e8654714887844 upstream. Tetsuo reported random crashes under memory pressure on 32-bit x86 system and tracked down to change that introduced page_vma_mapped_walk(). The root cause of the issue is the faulty pointer math in check_pte(). As ->pte may point to an arbitrary page we have to check that they are belong to the section before doing math. Otherwise it may lead to weird results. It wasn't noticed until now as mem_map[] is virtually contiguous on flatmem or vmemmap sparsemem. Pointer arithmetic just works against all 'struct page' pointers. But with classic sparsemem, it doesn't because each section memap is allocated separately and so consecutive pfns crossing two sections might have struct pages at completely unrelated addresses. Let's restructure code a bit and replace pointer arithmetic with operations on pfns. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> Reported-and-tested-by: Tetsuo Handa <penguin-kernel@xxxxxxxxxxxxxxxxxxx> Acked-by: Michal Hocko <mhocko@xxxxxxxx> Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") Signed-off-by: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> --- include/linux/swapops.h | 21 ++++++++++++++++ mm/page_vma_mapped.c | 63 ++++++++++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 25 deletions(-) --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -124,6 +124,11 @@ static inline bool is_write_device_priva return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return pfn_to_page(swp_offset(entry)); @@ -154,6 +159,11 @@ static inline bool is_write_device_priva return false; } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return NULL; @@ -189,6 +199,11 @@ static inline int is_write_migration_ent return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset(entry)); @@ -218,6 +233,12 @@ static inline int is_migration_entry(swp { return 0; } + +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { return NULL; --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -30,10 +30,29 @@ static bool map_pte(struct page_vma_mapp return true; } +/** + * check_pte - check if @pvmw->page is mapped at the @pvmw->pte + * + * page_vma_mapped_walk() found a place where @pvmw->page is *potentially* + * mapped. check_pte() has to validate this. + * + * @pvmw->pte may point to empty PTE, swap PTE or PTE pointing to arbitrary + * page. + * + * If PVMW_MIGRATION flag is set, returns true if @pvmw->pte contains migration + * entry that points to @pvmw->page or any subpage in case of THP. + * + * If PVMW_MIGRATION flag is not set, returns true if @pvmw->pte points to + * @pvmw->page or any subpage in case of THP. + * + * Otherwise, return false. + * + */ static bool check_pte(struct page_vma_mapped_walk *pvmw) { + unsigned long pfn; + if (pvmw->flags & PVMW_MIGRATION) { -#ifdef CONFIG_MIGRATION swp_entry_t entry; if (!is_swap_pte(*pvmw->pte)) return false; @@ -41,37 +60,31 @@ static bool check_pte(struct page_vma_ma if (!is_migration_entry(entry)) return false; - if (migration_entry_to_page(entry) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (migration_entry_to_page(entry) < pvmw->page) - return false; -#else - WARN_ON_ONCE(1); -#endif - } else { - if (is_swap_pte(*pvmw->pte)) { - swp_entry_t entry; - entry = pte_to_swp_entry(*pvmw->pte); - if (is_device_private_entry(entry) && - device_private_entry_to_page(entry) == pvmw->page) - return true; - } + pfn = migration_entry_to_pfn(entry); + } else if (is_swap_pte(*pvmw->pte)) { + swp_entry_t entry; - if (!pte_present(*pvmw->pte)) + /* Handle un-addressable ZONE_DEVICE memory */ + entry = pte_to_swp_entry(*pvmw->pte); + if (!is_device_private_entry(entry)) return false; - /* THP can be referenced by any subpage */ - if (pte_page(*pvmw->pte) - pvmw->page >= - hpage_nr_pages(pvmw->page)) { - return false; - } - if (pte_page(*pvmw->pte) < pvmw->page) + pfn = device_private_entry_to_pfn(entry); + } else { + if (!pte_present(*pvmw->pte)) return false; + + pfn = pte_pfn(*pvmw->pte); } + if (pfn < page_to_pfn(pvmw->page)) + return false; + + /* THP can be referenced by any subpage */ + if (pfn - page_to_pfn(pvmw->page) >= hpage_nr_pages(pvmw->page)) + return false; + return true; } Patches currently in stable-queue which might be from kirill.shutemov@xxxxxxxxxxxxxxx are queue-4.14/mm-page_vma_mapped-drop-faulty-pointer-arithmetics-in-check_pte.patch