On 10/08/2022 20:30, Peter Collingbourne wrote: > From: Catalin Marinas <catalin.marinas@xxxxxxx> > > Currently the PG_mte_tagged page flag mostly means the page contains > valid tags and it should be set after the tags have been cleared or > restored. However, in mte_sync_tags() it is set before setting the tags > to avoid, in theory, a race with concurrent mprotect(PROT_MTE) for > shared pages. However, a concurrent mprotect(PROT_MTE) with a copy on > write in another thread can cause the new page to have stale tags. > Similarly, tag reading via ptrace() can read stale tags of the > PG_mte_tagged flag is set before actually clearing/restoring the tags. > > Fix the PG_mte_tagged semantics so that it is only set after the tags > have been cleared or restored. This is safe for swap restoring into a > MAP_SHARED or CoW page since the core code takes the page lock. Add two > functions to test and set the PG_mte_tagged flag with acquire and > release semantics. The downside is that concurrent mprotect(PROT_MTE) on > a MAP_SHARED page may cause tag loss. This is already the case for KVM > guests if a VMM changes the page protection while the guest triggers a > user_mem_abort(). > > Signed-off-by: Catalin Marinas <catalin.marinas@xxxxxxx> > Cc: Will Deacon <will@xxxxxxxxxx> > Cc: Marc Zyngier <maz@xxxxxxxxxx> > Cc: Steven Price <steven.price@xxxxxxx> > Cc: Peter Collingbourne <pcc@xxxxxxxxxx> Reviewed-by: Steven Price <steven.price@xxxxxxx> > --- > v3: > - fix build with CONFIG_ARM64_MTE disabled > > arch/arm64/include/asm/mte.h | 30 ++++++++++++++++++++++++++++++ > arch/arm64/include/asm/pgtable.h | 2 +- > arch/arm64/kernel/cpufeature.c | 4 +++- > arch/arm64/kernel/elfcore.c | 2 +- > arch/arm64/kernel/hibernate.c | 2 +- > arch/arm64/kernel/mte.c | 12 +++++++----- > arch/arm64/kvm/guest.c | 4 ++-- > arch/arm64/kvm/mmu.c | 4 ++-- > arch/arm64/mm/copypage.c | 4 ++-- > arch/arm64/mm/fault.c | 2 +- > arch/arm64/mm/mteswap.c | 2 +- > 11 files changed, 51 insertions(+), 17 deletions(-) > > diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h > index aa523591a44e..46618c575eac 100644 > --- a/arch/arm64/include/asm/mte.h > +++ b/arch/arm64/include/asm/mte.h > @@ -37,6 +37,29 @@ void mte_free_tag_storage(char *storage); > /* track which pages have valid allocation tags */ > #define PG_mte_tagged PG_arch_2 > > +static inline void set_page_mte_tagged(struct page *page) > +{ > + /* > + * Ensure that the tags written prior to this function are visible > + * before the page flags update. > + */ > + smp_wmb(); > + set_bit(PG_mte_tagged, &page->flags); > +} > + > +static inline bool page_mte_tagged(struct page *page) > +{ > + bool ret = test_bit(PG_mte_tagged, &page->flags); > + > + /* > + * If the page is tagged, ensure ordering with a likely subsequent > + * read of the tags. > + */ > + if (ret) > + smp_rmb(); > + return ret; > +} > + > void mte_zero_clear_page_tags(void *addr); > void mte_sync_tags(pte_t old_pte, pte_t pte); > void mte_copy_page_tags(void *kto, const void *kfrom); > @@ -54,6 +77,13 @@ size_t mte_probe_user_range(const char __user *uaddr, size_t size); > /* unused if !CONFIG_ARM64_MTE, silence the compiler */ > #define PG_mte_tagged 0 > > +static inline void set_page_mte_tagged(struct page *page) > +{ > +} > +static inline bool page_mte_tagged(struct page *page) > +{ > + return false; > +} > static inline void mte_zero_clear_page_tags(void *addr) > { > } > diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h > index b5df82aa99e6..82719fa42c0e 100644 > --- a/arch/arm64/include/asm/pgtable.h > +++ b/arch/arm64/include/asm/pgtable.h > @@ -1050,7 +1050,7 @@ static inline void arch_swap_invalidate_area(int type) > static inline void arch_swap_restore(swp_entry_t entry, struct folio *folio) > { > if (system_supports_mte() && mte_restore_tags(entry, &folio->page)) > - set_bit(PG_mte_tagged, &folio->flags); > + set_page_mte_tagged(&folio->page); > } > > #endif /* CONFIG_ARM64_MTE */ > diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c > index 907401e4fffb..562c301bbf15 100644 > --- a/arch/arm64/kernel/cpufeature.c > +++ b/arch/arm64/kernel/cpufeature.c > @@ -2037,8 +2037,10 @@ static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap) > * Clear the tags in the zero page. This needs to be done via the > * linear map which has the Tagged attribute. > */ > - if (!test_and_set_bit(PG_mte_tagged, &ZERO_PAGE(0)->flags)) > + if (!page_mte_tagged(ZERO_PAGE(0))) { > mte_clear_page_tags(lm_alias(empty_zero_page)); > + set_page_mte_tagged(ZERO_PAGE(0)); > + } > > kasan_init_hw_tags_cpu(); > } > diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c > index 98d67444a5b6..f91bb1572d22 100644 > --- a/arch/arm64/kernel/elfcore.c > +++ b/arch/arm64/kernel/elfcore.c > @@ -47,7 +47,7 @@ static int mte_dump_tag_range(struct coredump_params *cprm, > * Pages mapped in user space as !pte_access_permitted() (e.g. > * PROT_EXEC only) may not have the PG_mte_tagged flag set. > */ > - if (!test_bit(PG_mte_tagged, &page->flags)) { > + if (!page_mte_tagged(page)) { > put_page(page); > dump_skip(cprm, MTE_PAGE_TAG_STORAGE); > continue; > diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c > index af5df48ba915..788597a6b6a2 100644 > --- a/arch/arm64/kernel/hibernate.c > +++ b/arch/arm64/kernel/hibernate.c > @@ -271,7 +271,7 @@ static int swsusp_mte_save_tags(void) > if (!page) > continue; > > - if (!test_bit(PG_mte_tagged, &page->flags)) > + if (!page_mte_tagged(page)) > continue; > > ret = save_tags(page, pfn); > diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c > index b2b730233274..2287316639f3 100644 > --- a/arch/arm64/kernel/mte.c > +++ b/arch/arm64/kernel/mte.c > @@ -41,14 +41,17 @@ static void mte_sync_page_tags(struct page *page, pte_t old_pte, > if (check_swap && is_swap_pte(old_pte)) { > swp_entry_t entry = pte_to_swp_entry(old_pte); > > - if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) > + if (!non_swap_entry(entry) && mte_restore_tags(entry, page)) { > + set_page_mte_tagged(page); > return; > + } > } > > if (!pte_is_tagged) > return; > > mte_clear_page_tags(page_address(page)); > + set_page_mte_tagged(page); > } > > void mte_sync_tags(pte_t old_pte, pte_t pte) > @@ -64,7 +67,7 @@ void mte_sync_tags(pte_t old_pte, pte_t pte) > > /* if PG_mte_tagged is set, tags have already been initialised */ > for (i = 0; i < nr_pages; i++, page++) { > - if (!test_and_set_bit(PG_mte_tagged, &page->flags)) > + if (!page_mte_tagged(page)) > mte_sync_page_tags(page, old_pte, check_swap, > pte_is_tagged); > } > @@ -91,8 +94,7 @@ int memcmp_pages(struct page *page1, struct page *page2) > * pages is tagged, set_pte_at() may zero or change the tags of the > * other page via mte_sync_tags(). > */ > - if (test_bit(PG_mte_tagged, &page1->flags) || > - test_bit(PG_mte_tagged, &page2->flags)) > + if (page_mte_tagged(page1) || page_mte_tagged(page2)) > return addr1 != addr2; > > return ret; > @@ -398,7 +400,7 @@ static int __access_remote_tags(struct mm_struct *mm, unsigned long addr, > put_page(page); > break; > } > - WARN_ON_ONCE(!test_bit(PG_mte_tagged, &page->flags)); > + WARN_ON_ONCE(!page_mte_tagged(page)); > > /* limit access to the end of the page */ > offset = offset_in_page(addr); > diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c > index 8c607199cad1..3b04e69006b4 100644 > --- a/arch/arm64/kvm/guest.c > +++ b/arch/arm64/kvm/guest.c > @@ -1058,7 +1058,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, > maddr = page_address(page); > > if (!write) { > - if (test_bit(PG_mte_tagged, &page->flags)) > + if (page_mte_tagged(page)) > num_tags = mte_copy_tags_to_user(tags, maddr, > MTE_GRANULES_PER_PAGE); > else > @@ -1075,7 +1075,7 @@ long kvm_vm_ioctl_mte_copy_tags(struct kvm *kvm, > * completed fully > */ > if (num_tags == MTE_GRANULES_PER_PAGE) > - set_bit(PG_mte_tagged, &page->flags); > + set_page_mte_tagged(page); > > kvm_release_pfn_dirty(pfn); > } > diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c > index 87f1cd0df36e..c9012707f69c 100644 > --- a/arch/arm64/kvm/mmu.c > +++ b/arch/arm64/kvm/mmu.c > @@ -1075,9 +1075,9 @@ static int sanitise_mte_tags(struct kvm *kvm, kvm_pfn_t pfn, > return -EFAULT; > > for (i = 0; i < nr_pages; i++, page++) { > - if (!test_bit(PG_mte_tagged, &page->flags)) { > + if (!page_mte_tagged(page)) { > mte_clear_page_tags(page_address(page)); > - set_bit(PG_mte_tagged, &page->flags); > + set_page_mte_tagged(page); > } > } > > diff --git a/arch/arm64/mm/copypage.c b/arch/arm64/mm/copypage.c > index 24913271e898..4223389b6180 100644 > --- a/arch/arm64/mm/copypage.c > +++ b/arch/arm64/mm/copypage.c > @@ -21,9 +21,9 @@ void copy_highpage(struct page *to, struct page *from) > > copy_page(kto, kfrom); > > - if (system_supports_mte() && test_bit(PG_mte_tagged, &from->flags)) { > - set_bit(PG_mte_tagged, &to->flags); > + if (system_supports_mte() && page_mte_tagged(from)) { > mte_copy_page_tags(kto, kfrom); > + set_page_mte_tagged(to); > } > } > EXPORT_SYMBOL(copy_highpage); > diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c > index c33f1fad2745..d095bfa16771 100644 > --- a/arch/arm64/mm/fault.c > +++ b/arch/arm64/mm/fault.c > @@ -931,5 +931,5 @@ struct page *alloc_zeroed_user_highpage_movable(struct vm_area_struct *vma, > void tag_clear_highpage(struct page *page) > { > mte_zero_clear_page_tags(page_address(page)); > - set_bit(PG_mte_tagged, &page->flags); > + set_page_mte_tagged(page); > } > diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c > index 4334dec93bd4..a78c1db23c68 100644 > --- a/arch/arm64/mm/mteswap.c > +++ b/arch/arm64/mm/mteswap.c > @@ -24,7 +24,7 @@ int mte_save_tags(struct page *page) > { > void *tag_storage, *ret; > > - if (!test_bit(PG_mte_tagged, &page->flags)) > + if (!page_mte_tagged(page)) > return 0; > > tag_storage = mte_allocate_tag_storage();