On Tue, Jun 1, 2021 at 10:51 PM Peter Collingbourne <pcc@xxxxxxxxxx> wrote: > > Poisoning freed pages protects against kernel use-after-free. The > likelihood of such a bug involving kernel pages is significantly higher > than that for user pages. At the same time, poisoning freed pages can > impose a significant performance cost, which cannot always be justified > for user pages given the lower probability of finding a bug. Therefore, > disable freed user page poisoning when using HW tags. We identify > "user" pages via the flag set GFP_HIGHUSER_MOVABLE, which indicates > a strong likelihood of not being directly accessible to the kernel. > > Signed-off-by: Peter Collingbourne <pcc@xxxxxxxxxx> > Link: https://linux-review.googlesource.com/id/I716846e2de8ef179f44e835770df7e6307be96c9 > --- > v4: > - move flag to GFP_HIGHUSER_MOVABLE > - remove command line flag > > include/linux/gfp.h | 13 ++++++++++--- > include/linux/page-flags.h | 9 +++++++++ > include/trace/events/mmflags.h | 9 ++++++++- > mm/kasan/hw_tags.c | 3 +++ > mm/page_alloc.c | 12 +++++++----- > 5 files changed, 37 insertions(+), 9 deletions(-) > > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index 68ba237365dc..e6102dfa4faa 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -54,8 +54,9 @@ struct vm_area_struct; > #define ___GFP_THISNODE 0x200000u > #define ___GFP_ACCOUNT 0x400000u > #define ___GFP_ZEROTAGS 0x800000u > +#define ___GFP_SKIP_KASAN_POISON 0x1000000u > #ifdef CONFIG_LOCKDEP > -#define ___GFP_NOLOCKDEP 0x1000000u > +#define ___GFP_NOLOCKDEP 0x2000000u > #else > #define ___GFP_NOLOCKDEP 0 > #endif > @@ -233,17 +234,22 @@ struct vm_area_struct; > * > * %__GFP_ZEROTAGS returns a page with zeroed memory tags on success, if > * __GFP_ZERO is set. > + * > + * %__GFP_SKIP_KASAN_POISON returns a page which does not need to be poisoned > + * on deallocation. Typically used for userspace pages. Currently only has an > + * effect in HW tags mode. > */ > #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) > #define __GFP_COMP ((__force gfp_t)___GFP_COMP) > #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) > #define __GFP_ZEROTAGS ((__force gfp_t)___GFP_ZEROTAGS) > +#define __GFP_SKIP_KASAN_POISON ((__force gfp_t)___GFP_SKIP_KASAN_POISON) > > /* Disable lockdep for GFP context tracking */ > #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) > > /* Room for N __GFP_FOO bits */ > -#define __GFP_BITS_SHIFT (24 + IS_ENABLED(CONFIG_LOCKDEP)) > +#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) > #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) > > /** > @@ -324,7 +330,8 @@ struct vm_area_struct; > #define GFP_DMA __GFP_DMA > #define GFP_DMA32 __GFP_DMA32 > #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) > -#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) > +#define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE | \ > + __GFP_SKIP_KASAN_POISON) > #define GFP_TRANSHUGE_LIGHT ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ > __GFP_NOMEMALLOC | __GFP_NOWARN) & ~__GFP_RECLAIM) > #define GFP_TRANSHUGE (GFP_TRANSHUGE_LIGHT | __GFP_DIRECT_RECLAIM) > diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h > index 04a34c08e0a6..40e2c5000585 100644 > --- a/include/linux/page-flags.h > +++ b/include/linux/page-flags.h > @@ -137,6 +137,9 @@ enum pageflags { > #endif > #ifdef CONFIG_64BIT > PG_arch_2, > +#endif > +#ifdef CONFIG_KASAN_HW_TAGS > + PG_skip_kasan_poison, > #endif > __NR_PAGEFLAGS, > > @@ -443,6 +446,12 @@ TESTCLEARFLAG(Young, young, PF_ANY) > PAGEFLAG(Idle, idle, PF_ANY) > #endif > > +#ifdef CONFIG_KASAN_HW_TAGS > +PAGEFLAG(SkipKASanPoison, skip_kasan_poison, PF_HEAD) > +#else > +PAGEFLAG_FALSE(SkipKASanPoison) > +#endif > + > /* > * PageReported() is used to track reported free pages within the Buddy > * allocator. We can use the non-atomic version of the test and set > diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h > index 629c7a0eaff2..390270e00a1d 100644 > --- a/include/trace/events/mmflags.h > +++ b/include/trace/events/mmflags.h > @@ -85,6 +85,12 @@ > #define IF_HAVE_PG_ARCH_2(flag,string) > #endif > > +#ifdef CONFIG_KASAN_HW_TAGS > +#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) ,{1UL << flag, string} > +#else > +#define IF_HAVE_PG_SKIP_KASAN_POISON(flag,string) > +#endif > + > #define __def_pageflag_names \ > {1UL << PG_locked, "locked" }, \ > {1UL << PG_waiters, "waiters" }, \ > @@ -112,7 +118,8 @@ IF_HAVE_PG_UNCACHED(PG_uncached, "uncached" ) \ > IF_HAVE_PG_HWPOISON(PG_hwpoison, "hwpoison" ) \ > IF_HAVE_PG_IDLE(PG_young, "young" ) \ > IF_HAVE_PG_IDLE(PG_idle, "idle" ) \ > -IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) > +IF_HAVE_PG_ARCH_2(PG_arch_2, "arch_2" ) \ > +IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_kasan_poison, "skip_kasan_poison") > > #define show_page_flags(flags) \ > (flags) ? __print_flags(flags, "|", \ > diff --git a/mm/kasan/hw_tags.c b/mm/kasan/hw_tags.c > index 41fd5326ee0a..ed5e5b833d61 100644 > --- a/mm/kasan/hw_tags.c > +++ b/mm/kasan/hw_tags.c > @@ -246,6 +246,9 @@ void kasan_alloc_pages(struct page *page, unsigned int order, gfp_t flags) > */ > bool init = !want_init_on_free() && want_init_on_alloc(flags); > > + if (flags & __GFP_SKIP_KASAN_POISON) > + SetPageSkipKASanPoison(page); > + > if (flags & __GFP_ZEROTAGS) { > int i; > > diff --git a/mm/page_alloc.c b/mm/page_alloc.c > index 13937e793fda..5ad76e540a22 100644 > --- a/mm/page_alloc.c > +++ b/mm/page_alloc.c > @@ -394,11 +394,12 @@ static DEFINE_STATIC_KEY_TRUE(deferred_pages); > * on-demand allocation and then freed again before the deferred pages > * initialization is done, but this is not likely to happen. > */ > -static inline bool should_skip_kasan_poison(fpi_t fpi_flags) > +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) > { > return static_branch_unlikely(&deferred_pages) || > (!IS_ENABLED(CONFIG_KASAN_GENERIC) && > - (fpi_flags & FPI_SKIP_KASAN_POISON)); > + (fpi_flags & FPI_SKIP_KASAN_POISON)) || > + PageSkipKASanPoison(page); > } > > /* Returns true if the struct page for the pfn is uninitialised */ > @@ -449,10 +450,11 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn) > return false; > } > #else > -static inline bool should_skip_kasan_poison(fpi_t fpi_flags) > +static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags) > { > return (!IS_ENABLED(CONFIG_KASAN_GENERIC) && > - (fpi_flags & FPI_SKIP_KASAN_POISON)); > + (fpi_flags & FPI_SKIP_KASAN_POISON)) || > + PageSkipKASanPoison(page); > } > > static inline bool early_page_uninitialised(unsigned long pfn) > @@ -1244,7 +1246,7 @@ static __always_inline bool free_pages_prepare(struct page *page, > unsigned int order, bool check_free, fpi_t fpi_flags) > { > int bad = 0; > - bool skip_kasan_poison = should_skip_kasan_poison(fpi_flags); > + bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags); > > VM_BUG_ON_PAGE(PageTail(page), page); > > -- > 2.32.0.rc1.229.g3e70b5a671-goog > Reviewed-by: Andrey Konovalov <andreyknvl@xxxxxxxxx>