The user PTE page table page may be freed when the last percpu_ref is dropped. So we need to try to get its percpu_ref before accessing the PTE page to prevent it form being freed during the access process. This patch adds pte_tryget() and {__,}pte_put() to help us to get and put the percpu_ref of user PTE page table pages. Signed-off-by: Qi Zheng <zhengqi.arch@xxxxxxxxxxxxx> --- include/linux/pte_ref.h | 23 ++++++++++++++++ mm/pte_ref.c | 58 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) diff --git a/include/linux/pte_ref.h b/include/linux/pte_ref.h index d3963a151ca5..bfe620038699 100644 --- a/include/linux/pte_ref.h +++ b/include/linux/pte_ref.h @@ -12,6 +12,10 @@ bool pte_ref_init(pgtable_t pte); void pte_ref_free(pgtable_t pte); +void free_user_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); +bool pte_tryget(struct mm_struct *mm, pmd_t *pmd, unsigned long addr); +void __pte_put(pgtable_t page); +void pte_put(pte_t *ptep); #else /* !CONFIG_FREE_USER_PTE */ @@ -24,6 +28,25 @@ static inline void pte_ref_free(pgtable_t pte) { } +static inline void free_user_pte(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr) +{ +} + +static inline bool pte_tryget(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr) +{ + return true; +} + +static inline void __pte_put(pgtable_t page) +{ +} + +static inline void pte_put(pte_t *ptep) +{ +} + #endif /* CONFIG_FREE_USER_PTE */ #endif /* _LINUX_PTE_REF_H */ diff --git a/mm/pte_ref.c b/mm/pte_ref.c index 52e31be00de4..5b382445561e 100644 --- a/mm/pte_ref.c +++ b/mm/pte_ref.c @@ -44,4 +44,62 @@ void pte_ref_free(pgtable_t pte) kfree(ref); } +void free_user_pte(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) {} + +/* + * pte_tryget - try to get the pte_ref of the user PTE page table page + * @mm: pointer the target address space + * @pmd: pointer to a PMD. + * @addr: virtual address associated with pmd. + * + * Return: true if getting the pte_ref succeeded. And false otherwise. + * + * Before accessing the user PTE page table, we need to hold a refcount to + * protect against the concurrent release of the PTE page table. + * But we will fail in the following case: + * - The content mapped in @pmd is not a PTE page + * - The pte_ref is zero, it may be reclaimed + */ +bool pte_tryget(struct mm_struct *mm, pmd_t *pmd, unsigned long addr) +{ + bool retval = true; + pmd_t pmdval; + pgtable_t pte; + + rcu_read_lock(); + pmdval = READ_ONCE(*pmd); + pte = pmd_pgtable(pmdval); + if (unlikely(pmd_none(pmdval) || pmd_leaf(pmdval))) { + retval = false; + } else if (!percpu_ref_tryget(pte->pte_ref)) { + rcu_read_unlock(); + /* + * Also do free_user_pte() here to prevent missed reclaim due + * to race condition. + */ + free_user_pte(mm, pmd, addr & PMD_MASK); + return false; + } + rcu_read_unlock(); + + return retval; +} + +void __pte_put(pgtable_t page) +{ + percpu_ref_put(page->pte_ref); +} + +void pte_put(pte_t *ptep) +{ + pgtable_t page; + + if (pte_huge(*ptep)) + return; + + page = pte_to_page(ptep); + __pte_put(page); +} +EXPORT_SYMBOL(pte_put); + #endif /* CONFIG_FREE_USER_PTE */ -- 2.20.1