On Wed, May 12, 2021 at 11:09 PM Peter Collingbourne <pcc@xxxxxxxxxx> wrote: > > Currently, on an anonymous page fault, the kernel allocates a zeroed > page and maps it in user space. If the mapping is tagged (PROT_MTE), > set_pte_at() additionally clears the tags. It is, however, more > efficient to clear the tags at the same time as zeroing the data on > allocation. To avoid clearing the tags on any page (which may not be > mapped as tagged), only do this if the vma flags contain VM_MTE. This > requires introducing a new GFP flag that is used to determine whether > to clear the tags. > > The DC GZVA instruction with a 0 top byte (and 0 tag) requires > top-byte-ignore. Set the TCR_EL1.{TBI1,TBID1} bits irrespective of > whether KASAN_HW is enabled. > > Signed-off-by: Peter Collingbourne <pcc@xxxxxxxxxx> > Co-developed-by: Catalin Marinas <catalin.marinas@xxxxxxx> > Signed-off-by: Catalin Marinas <catalin.marinas@xxxxxxx> > Link: https://linux-review.googlesource.com/id/Id46dc94e30fe11474f7e54f5d65e7658dbdddb26 > Reviewed-by: Catalin Marinas <catalin.marinas@xxxxxxx> > --- > v2: > - remove want_zero_tags_on_free() > > arch/arm64/include/asm/mte.h | 4 ++++ > arch/arm64/include/asm/page.h | 9 +++++++-- > arch/arm64/lib/mte.S | 20 ++++++++++++++++++++ > arch/arm64/mm/fault.c | 25 +++++++++++++++++++++++++ > arch/arm64/mm/proc.S | 10 +++++++--- > include/linux/gfp.h | 9 +++++++-- > include/linux/highmem.h | 8 ++++++++ > mm/kasan/hw_tags.c | 9 ++++++++- > mm/page_alloc.c | 13 ++++++++++--- > 9 files changed, 96 insertions(+), 11 deletions(-) > > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h > index bc88a1ced0d7..67bf259ae768 100644 > --- a/arch/arm64/include/asm/mte.h > +++ b/arch/arm64/include/asm/mte.h > @@ -37,6 +37,7 @@ void mte_free_tag_storage(char *storage); > /* track which pages have valid allocation tags */ > #define PG_mte_tagged PG_arch_2 > > +void mte_zero_clear_page_tags(void *addr); > void mte_sync_tags(pte_t *ptep, pte_t pte); > void mte_copy_page_tags(void *kto, const void *kfrom); > void mte_thread_init_user(void); > @@ -53,6 +54,9 @@ int mte_ptrace_copy_tags(struct task_struct *child, long request, > /* unused if !CONFIG_ARM64_MTE, silence the compiler */ > #define PG_mte_tagged 0 > > +static inline void mte_zero_clear_page_tags(void *addr) > +{ > +} > static inline void mte_sync_tags(pte_t *ptep, pte_t pte) > { > } > diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h > index 012cffc574e8..448e14071d13 100644 > --- a/arch/arm64/include/asm/page.h > +++ b/arch/arm64/include/asm/page.h > @@ -13,6 +13,7 @@ > #ifndef __ASSEMBLY__ > > #include <linux/personality.h> /* for READ_IMPLIES_EXEC */ > +#include <linux/types.h> /* for gfp_t */ > #include <asm/pgtable-types.h> > > struct page; > @@ -28,10 +29,14 @@ void copy_user_highpage(struct page *to, struct page *from, > void copy_highpage(struct page *to, struct page *from); > #define __HAVE_ARCH_COPY_HIGHPAGE > > -#define __alloc_zeroed_user_highpage(movableflags, vma, vaddr) \ > - alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | movableflags, vma, vaddr) > +struct page *__alloc_zeroed_user_highpage(gfp_t movableflags, > + struct vm_area_struct *vma, > + unsigned long vaddr); > #define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE > > +void tag_clear_highpage(struct page *to); > +#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE > + > #define clear_user_page(page, vaddr, pg) clear_page(page) > #define copy_user_page(to, from, vaddr, pg) copy_page(to, from) > > diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S > index 351537c12f36..e83643b3995f 100644 > --- a/arch/arm64/lib/mte.S > +++ b/arch/arm64/lib/mte.S > @@ -36,6 +36,26 @@ SYM_FUNC_START(mte_clear_page_tags) > ret > SYM_FUNC_END(mte_clear_page_tags) > > +/* > + * Zero the page and tags at the same time > + * > + * Parameters: > + * x0 - address to the beginning of the page > + */ > +SYM_FUNC_START(mte_zero_clear_page_tags) > + mrs x1, dczid_el0 > + and w1, w1, #0xf > + mov x2, #4 > + lsl x1, x2, x1 > + and x0, x0, #(1 << MTE_TAG_SHIFT) - 1 // clear the tag > + > +1: dc gzva, x0 > + add x0, x0, x1 > + tst x0, #(PAGE_SIZE - 1) > + b.ne 1b > + ret > +SYM_FUNC_END(mte_zero_clear_page_tags) > + > /* > * Copy the tags from the source page to the destination one > * x0 - address of the destination page > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index 871c82ab0a30..8127e0c0b8fb 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -921,3 +921,28 @@ void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, > debug_exception_exit(regs); > } > NOKPROBE_SYMBOL(do_debug_exception); > + > +/* > + * Used during anonymous page fault handling. > + */ > +struct page *__alloc_zeroed_user_highpage(gfp_t flags, > + struct vm_area_struct *vma, > + unsigned long vaddr) > +{ > + /* > + * If the page is mapped with PROT_MTE, initialise the tags at the > + * point of allocation and page zeroing as this is usually faster than > + * separate DC ZVA and STGM. > + */ > + if (vma->vm_flags & VM_MTE) > + flags |= __GFP_ZEROTAGS; > + > + return alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO | flags, vma, vaddr); > +} > + > +void tag_clear_highpage(struct page *page) > +{ > + mte_zero_clear_page_tags(page_address(page)); > + page_kasan_tag_reset(page); > + set_bit(PG_mte_tagged, &page->flags); > +} > diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S > index 0a48191534ff..a27c77dbe91c 100644 > --- a/arch/arm64/mm/proc.S > +++ b/arch/arm64/mm/proc.S > @@ -46,9 +46,13 @@ > #endif > > #ifdef CONFIG_KASAN_HW_TAGS > -#define TCR_KASAN_HW_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 > +#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 > #else > -#define TCR_KASAN_HW_FLAGS 0 > +/* > + * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on > + * TBI being enabled at EL1. > + */ > +#define TCR_MTE_FLAGS TCR_TBI1 | TCR_TBID1 > #endif > > /* > @@ -452,7 +456,7 @@ SYM_FUNC_START(__cpu_setup) > msr_s SYS_TFSRE0_EL1, xzr > > /* set the TCR_EL1 bits */ > - mov_q x10, TCR_KASAN_HW_FLAGS > + mov_q x10, TCR_MTE_FLAGS > orr tcr, tcr, x10 > 1: > #endif > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index 11da8af06704..68ba237365dc 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -53,8 +53,9 @@ struct vm_area_struct; > #define ___GFP_HARDWALL 0x100000u > #define ___GFP_THISNODE 0x200000u > #define ___GFP_ACCOUNT 0x400000u > +#define ___GFP_ZEROTAGS 0x800000u > #ifdef CONFIG_LOCKDEP > -#define ___GFP_NOLOCKDEP 0x800000u > +#define ___GFP_NOLOCKDEP 0x1000000u > #else > #define ___GFP_NOLOCKDEP 0 > #endif > @@ -229,16 +230,20 @@ struct vm_area_struct; > * %__GFP_COMP address compound page metadata. > * > * %__GFP_ZERO returns a zeroed page on success. > + * > + * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if > + * __GFP_ZERO is set. > */ > #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) > #define __GFP_COMP ((__force gfp_t)___GFP_COMP) > #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) > +#define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) > > /* Disable lockdep for GFP context tracking */ > #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) > > /* Room for N __GFP_FOO bits */ > -#define __GFP_BITS_SHIFT (23 + IS_ENABLED(CONFIG_LOCKDEP)) > +#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) > #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) > > /** > diff --git a/include/linux/highmem.h b/include/linux/highmem.h > index 832b49b50c7b..caaa62e1dd24 100644 > --- a/include/linux/highmem.h > +++ b/include/linux/highmem.h > @@ -204,6 +204,14 @@ static inline void clear_highpage(struct page *page) > kunmap_atomic(kaddr); > } > > +#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE > + > +static inline void tag_clear_highpage(struct page *page) > +{ > +} > + > +#endif > + > /* > * If we pass in a base or tail page, we can zero up to PAGE_SIZE. > * If we pass in a head page, we can zero up to the size of the compound page. > diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c > index 45e552cb9172..34362c8d0955 100644 > --- a/mm/kasan/hw_tags.c > +++ b/mm/kasan/hw_tags.c > @@ -242,7 +242,14 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) > { > bool init = !want_init_on_free() && want_init_on_alloc(flags); > > - kasan_unpoison_pages(page, order, init); > + if (flags & __GFP_ZEROTAGS) { > + int i; > + > + for (i = 0; i != 1 << order; ++i) > + tag_clear_highpage(page + i); > + } else { > + kasan_unpoison_pages(page, order, init); > + } > } > > void kasan_free_pages(struct page *page, unsigned int order) > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 6e82a7f6fd6f..24e6f668ef73 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -1219,10 +1219,16 @@ static int free_tail_pages_check(struct page *head_page, struct page *page) > return ret; > } > > -static void kernel_init_free_pages(struct page *page, int numpages) > +static void kernel_init_free_pages(struct page *page, int numpages, bool zero_tags) > { > int i; > > + if (zero_tags) { > + for (i = 0; i < numpages; i++) > + tag_clear_highpage(page + i); > + return; > + } > + > /* s390's use of memset() could override KASAN redzones. */ > kasan_disable_current(); > for (i = 0; i < numpages; i++) { > @@ -1314,7 +1320,7 @@ static __always_inline bool free_pages_prepare(struct page *page, > bool init = want_init_on_free(); > > if (init) > - kernel_init_free_pages(page, 1 << order); > + kernel_init_free_pages(page, 1 << order, false); > if (!skip_kasan_poison) > kasan_poison_pages(page, order, init); > } > @@ -2350,7 +2356,8 @@ inline void post_alloc_hook(struct page *page, unsigned int order, > > kasan_unpoison_pages(page, order, init); > if (init) > - kernel_init_free_pages(page, 1 << order); > + kernel_init_free_pages(page, 1 << order, > + gfp_flags & __GFP_ZEROTAGS); > } > > set_page_owner(page, order, gfp_flags); > -- > 2.31.1.607.g51e8a6a459-goog > For KASAN parts: Reviewed-by: Andrey Konovalov <andreyknvl@xxxxxxxxx>