The recently introduced _PAGE_COW should be used instead of the HW Dirty bit whenever a PTE is Write=0, in order to not inadvertently create shadow stack PTEs. Update pte_mk*() helpers to do this, and apply the same changes to pmd and pud. Reviewed-by: Kees Cook <keescook@xxxxxxxxxxxx> Tested-by: Pengfei Xu <pengfei.xu@xxxxxxxxx> Tested-by: John Allen <john.allen@xxxxxxx> Co-developed-by: Yu-cheng Yu <yu-cheng.yu@xxxxxxxxx> Signed-off-by: Yu-cheng Yu <yu-cheng.yu@xxxxxxxxx> Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx> --- v4: - Break part patch for better bisectability arch/x86/include/asm/pgtable.h | 125 ++++++++++++++++++++++++++++----- 1 file changed, 107 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c5047eb5f406..e96558abc8ec 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -124,9 +124,17 @@ extern pmdval_t early_pmd_flags; * The following only work if pte_present() is true. * Undefined behaviour if not.. */ -static inline int pte_dirty(pte_t pte) +static inline bool pte_dirty(pte_t pte) { - return pte_flags(pte) & _PAGE_DIRTY; + return pte_flags(pte) & _PAGE_DIRTY_BITS; +} + +static inline bool pte_shstk(pte_t pte) +{ + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) + return false; + + return (pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY)) == _PAGE_DIRTY; } static inline int pte_young(pte_t pte) @@ -134,9 +142,18 @@ static inline int pte_young(pte_t pte) return pte_flags(pte) & _PAGE_ACCESSED; } -static inline int pmd_dirty(pmd_t pmd) +static inline bool pmd_dirty(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_DIRTY; + return pmd_flags(pmd) & _PAGE_DIRTY_BITS; +} + +static inline bool pmd_shstk(pmd_t pmd) +{ + if (!cpu_feature_enabled(X86_FEATURE_USER_SHSTK)) + return false; + + return (pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY | _PAGE_PSE)) == + (_PAGE_DIRTY | _PAGE_PSE); } #define pmd_young pmd_young @@ -145,9 +162,9 @@ static inline int pmd_young(pmd_t pmd) return pmd_flags(pmd) & _PAGE_ACCESSED; } -static inline int pud_dirty(pud_t pud) +static inline bool pud_dirty(pud_t pud) { - return pud_flags(pud) & _PAGE_DIRTY; + return pud_flags(pud) & _PAGE_DIRTY_BITS; } static inline int pud_young(pud_t pud) @@ -157,13 +174,21 @@ static inline int pud_young(pud_t pud) static inline int pte_write(pte_t pte) { - return pte_flags(pte) & _PAGE_RW; + /* + * Shadow stack pages are logically writable, but do not have + * _PAGE_RW. Check for them separately from _PAGE_RW itself. + */ + return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte); } #define pmd_write pmd_write static inline int pmd_write(pmd_t pmd) { - return pmd_flags(pmd) & _PAGE_RW; + /* + * Shadow stack pages are logically writable, but do not have + * _PAGE_RW. Check for them separately from _PAGE_RW itself. + */ + return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd); } #define pud_write pud_write @@ -374,7 +399,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte) static inline pte_t pte_mkclean(pte_t pte) { - return pte_clear_flags(pte, _PAGE_DIRTY); + return pte_clear_flags(pte, _PAGE_DIRTY_BITS); } static inline pte_t pte_mkold(pte_t pte) @@ -384,7 +409,16 @@ static inline pte_t pte_mkold(pte_t pte) static inline pte_t pte_wrprotect(pte_t pte) { - return pte_clear_flags(pte, _PAGE_RW); + pte = pte_clear_flags(pte, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PTE (Write=0,Dirty=1). Move the hardware + * dirty value to the software bit. + */ + if (pte_dirty(pte)) + pte = pte_mkcow(pte); + return pte; } static inline pte_t pte_mkexec(pte_t pte) @@ -396,6 +430,10 @@ static inline pte_t __pte_mkdirty(pte_t pte, bool soft) { pteval_t dirty = _PAGE_DIRTY; + /* Avoid creating Dirty=1,Write=0 PTEs */ + if (cpu_feature_enabled(X86_FEATURE_USER_SHSTK) && !pte_write(pte)) + dirty = _PAGE_COW; + if (soft) dirty |= _PAGE_SOFT_DIRTY; @@ -407,6 +445,12 @@ static inline pte_t pte_mkdirty(pte_t pte) return __pte_mkdirty(pte, true); } +static inline pte_t pte_mkwrite_shstk(pte_t pte) +{ + /* pte_clear_cow() also sets Dirty=1 */ + return pte_clear_cow(pte); +} + static inline pte_t pte_mkyoung(pte_t pte) { return pte_set_flags(pte, _PAGE_ACCESSED); @@ -414,7 +458,12 @@ static inline pte_t pte_mkyoung(pte_t pte) static inline pte_t pte_mkwrite(pte_t pte) { - return pte_set_flags(pte, _PAGE_RW); + pte = pte_set_flags(pte, _PAGE_RW); + + if (pte_dirty(pte)) + pte = pte_clear_cow(pte); + + return pte; } static inline pte_t pte_mkhuge(pte_t pte) @@ -505,18 +554,30 @@ static inline pmd_t pmd_mkold(pmd_t pmd) static inline pmd_t pmd_mkclean(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_DIRTY); + return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS); } static inline pmd_t pmd_wrprotect(pmd_t pmd) { - return pmd_clear_flags(pmd, _PAGE_RW); + pmd = pmd_clear_flags(pmd, _PAGE_RW); + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PMD (RW=0, Dirty=1). Move the hardware + * dirty value to the software bit. + */ + if (pmd_dirty(pmd)) + pmd = pmd_mkcow(pmd); + return pmd; } static inline pmd_t __pmd_mkdirty(pmd_t pmd, bool soft) { pmdval_t dirty = _PAGE_DIRTY; + /* Avoid creating (HW)Dirty=1, Write=0 PMDs */ + if (cpu_feature_enabled(X86_FEATURE_USER_SHSTK) && !pmd_write(pmd)) + dirty = _PAGE_COW; + if (soft) dirty |= _PAGE_SOFT_DIRTY; @@ -528,6 +589,11 @@ static inline pmd_t pmd_mkdirty(pmd_t pmd) return __pmd_mkdirty(pmd, true); } +static inline pmd_t pmd_mkwrite_shstk(pmd_t pmd) +{ + return pmd_clear_cow(pmd); +} + static inline pmd_t pmd_mkdevmap(pmd_t pmd) { return pmd_set_flags(pmd, _PAGE_DEVMAP); @@ -545,7 +611,11 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) static inline pmd_t pmd_mkwrite(pmd_t pmd) { - return pmd_set_flags(pmd, _PAGE_RW); + pmd = pmd_set_flags(pmd, _PAGE_RW); + + if (pmd_dirty(pmd)) + pmd = pmd_clear_cow(pmd); + return pmd; } static inline pud_t pud_set_flags(pud_t pud, pudval_t set) @@ -589,17 +659,32 @@ static inline pud_t pud_mkold(pud_t pud) static inline pud_t pud_mkclean(pud_t pud) { - return pud_clear_flags(pud, _PAGE_DIRTY); + return pud_clear_flags(pud, _PAGE_DIRTY_BITS); } static inline pud_t pud_wrprotect(pud_t pud) { - return pud_clear_flags(pud, _PAGE_RW); + pud = pud_clear_flags(pud, _PAGE_RW); + + /* + * Blindly clearing _PAGE_RW might accidentally create + * a shadow stack PUD (RW=0, Dirty=1). Move the hardware + * dirty value to the software bit. + */ + if (pud_dirty(pud)) + pud = pud_mkcow(pud); + return pud; } static inline pud_t pud_mkdirty(pud_t pud) { - return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); + pudval_t dirty = _PAGE_DIRTY; + + /* Avoid creating (HW)Dirty=1, Write=0 PUDs */ + if (cpu_feature_enabled(X86_FEATURE_USER_SHSTK) && !pud_write(pud)) + dirty = _PAGE_COW; + + return pud_set_flags(pud, dirty | _PAGE_SOFT_DIRTY); } static inline pud_t pud_mkdevmap(pud_t pud) @@ -619,7 +704,11 @@ static inline pud_t pud_mkyoung(pud_t pud) static inline pud_t pud_mkwrite(pud_t pud) { - return pud_set_flags(pud, _PAGE_RW); + pud = pud_set_flags(pud, _PAGE_RW); + + if (pud_dirty(pud)) + pud = pud_clear_cow(pud); + return pud; } #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY -- 2.17.1