On Tue, Dec 29, 2020 at 01:30:35PM -0800, Yu-cheng Yu wrote: > @@ -182,6 +182,12 @@ static inline int pud_young(pud_t pud) > > static inline int pte_write(pte_t pte) > { > + /* > + * If _PAGE_DIRTY is set, the PTE must either have _PAGE_RW or be > + * a shadow stack PTE, which is logically writable. > + */ > + if (cpu_feature_enabled(X86_FEATURE_SHSTK)) > + return pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY); > return pte_flags(pte) & _PAGE_RW; if (cpu_feature_enabled(X86_FEATURE_SHSTK)) return pte_flags(pte) & (_PAGE_RW | _PAGE_DIRTY); else return pte_flags(pte) & _PAGE_RW; The else makes it ballanced and easier to read. > @@ -333,7 +339,7 @@ static inline pte_t pte_clear_uffd_wp(pte_t pte) > > static inline pte_t pte_mkclean(pte_t pte) > { > - return pte_clear_flags(pte, _PAGE_DIRTY); > + return pte_clear_flags(pte, _PAGE_DIRTY_BITS); > } > > static inline pte_t pte_mkold(pte_t pte) > @@ -343,6 +349,16 @@ static inline pte_t pte_mkold(pte_t pte) > > static inline pte_t pte_wrprotect(pte_t pte) > { > + /* > + * Blindly clearing _PAGE_RW might accidentally create > + * a shadow stack PTE (RW=0, Dirty=1). Move the hardware > + * dirty value to the software bit. > + */ > + if (cpu_feature_enabled(X86_FEATURE_SHSTK)) { > + pte.pte |= (pte.pte & _PAGE_DIRTY) >> _PAGE_BIT_DIRTY << _PAGE_BIT_COW; Why the unreadable shifting when you can simply do: if (pte.pte & _PAGE_DIRTY) pte.pte |= _PAGE_COW; ? > @@ -434,16 +469,40 @@ static inline pmd_t pmd_mkold(pmd_t pmd) > > static inline pmd_t pmd_mkclean(pmd_t pmd) > { > - return pmd_clear_flags(pmd, _PAGE_DIRTY); > + return pmd_clear_flags(pmd, _PAGE_DIRTY_BITS); > } > > static inline pmd_t pmd_wrprotect(pmd_t pmd) > { > + /* > + * Blindly clearing _PAGE_RW might accidentally create > + * a shadow stack PMD (RW=0, Dirty=1). Move the hardware > + * dirty value to the software bit. > + */ > + if (cpu_feature_enabled(X86_FEATURE_SHSTK)) { > + pmdval_t v = native_pmd_val(pmd); > + > + v |= (v & _PAGE_DIRTY) >> _PAGE_BIT_DIRTY << _PAGE_BIT_COW; As above. > @@ -488,17 +554,35 @@ static inline pud_t pud_mkold(pud_t pud) > > static inline pud_t pud_mkclean(pud_t pud) > { > - return pud_clear_flags(pud, _PAGE_DIRTY); > + return pud_clear_flags(pud, _PAGE_DIRTY_BITS); > } > > static inline pud_t pud_wrprotect(pud_t pud) > { > + /* > + * Blindly clearing _PAGE_RW might accidentally create > + * a shadow stack PUD (RW=0, Dirty=1). Move the hardware > + * dirty value to the software bit. > + */ > + if (cpu_feature_enabled(X86_FEATURE_SHSTK)) { > + pudval_t v = native_pud_val(pud); > + > + v |= (v & _PAGE_DIRTY) >> _PAGE_BIT_DIRTY << _PAGE_BIT_COW; Ditto. > @@ -1131,6 +1222,12 @@ extern int pmdp_clear_flush_young(struct vm_area_struct *vma, > #define pmd_write pmd_write > static inline int pmd_write(pmd_t pmd) > { > + /* > + * If _PAGE_DIRTY is set, then the PMD must either have _PAGE_RW or > + * be a shadow stack PMD, which is logically writable. > + */ > + if (cpu_feature_enabled(X86_FEATURE_SHSTK)) > + return pmd_flags(pmd) & (_PAGE_RW | _PAGE_DIRTY); else > return pmd_flags(pmd) & _PAGE_RW; > } > -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette