Do not worry too much about the implementation of this. I am trying to implement this more neatly using the x86 PAT logic but haven't managed to get it working in time for the RFC. To enable testing & review, I'm very hastily throwing something together that basically works, based on a simplified version of what was used for the latest RFC [0]. [0] https://lore.kernel.org/linux-mm/20250110-asi-rfc-v2-v2-0-8419288bc805@xxxxxxxxxx/ Signed-off-by: Brendan Jackman <jackmanb@xxxxxxxxxx> --- arch/x86/include/asm/asi.h | 3 ++ arch/x86/mm/asi.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/asi.h | 2 ++ include/linux/vmalloc.h | 4 +++ mm/vmalloc.c | 32 +++++++++++-------- 5 files changed, 105 insertions(+), 13 deletions(-) diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h index b8f604df6a36508acbc10710f821d5f95e8cdceb..cf8be544de8b108190b765e3eb337089866207a2 100644 --- a/arch/x86/include/asm/asi.h +++ b/arch/x86/include/asm/asi.h @@ -23,6 +23,9 @@ static __always_inline pgd_t *asi_pgd(struct asi *asi) return asi ? asi->pgd : NULL; } +void asi_map(struct page *page, int numpages); +void asi_unmap(struct page *page, int numpages); + #endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */ #endif /* _ASM_X86_ASI_H */ diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c index e5a981a7b3192655cd981633514fbf945b92c9b6..570233224789631352891f47ac2f0453a7adc06e 100644 --- a/arch/x86/mm/asi.c +++ b/arch/x86/mm/asi.c @@ -1,8 +1,85 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/bug.h> +#include <linux/gfp.h> +#include <linux/mm.h> +#include <linux/mm_types.h> +#include <linux/pgtable.h> +#include <linux/set_memory.h> +#include <linux/vmalloc.h> + #include <asm/asi.h> +#include <asm/traps.h> static __aligned(PAGE_SIZE) pgd_t asi_global_nonsensitive_pgd[PTRS_PER_PGD]; struct asi __asi_global_nonsensitive = { .pgd = asi_global_nonsensitive_pgd, }; + +/* + * Map the given pages into the ASI nonsensitive physmap. The source of the + * mapping is the regular unrestricted page tables. Only supports mapping at + * pageblock granularity. Does no synchronization. + */ +void asi_map(struct page *page, int numpages) +{ + unsigned long virt; + unsigned long start = (size_t)(page_to_virt(page)); + unsigned long end = start + PAGE_SIZE * numpages; + unsigned long page_size; + + VM_BUG_ON(!IS_ALIGNED(page_to_pfn(page), pageblock_nr_pages)); + VM_BUG_ON(!IS_ALIGNED(numpages, pageblock_nr_pages)); + + for (virt = start; virt < end; virt = ALIGN(virt + 1, page_size)) { + pte_t *pte, *pte_asi; + int level, level_asi; + pgd_t *pgd = pgd_offset_pgd(asi_global_nonsensitive_pgd, virt); + + pte_asi = lookup_pgtable_in_pgd(pgd, virt, &level_asi); + page_size = page_level_size(level_asi); + + pte = lookup_address(virt, &level); + if (!pte || pte_none(*pte)) + continue; + + /* + * Physmap should already be setup by PAT code, with no pages + * smaller than 2M. This function should only be called at + * pageblock granularity. Thus it should never be required to + * break up pages here. + */ + if (WARN_ON_ONCE(!pte_asi) || + WARN_ON_ONCE(ALIGN_DOWN(virt, page_size) < virt) || + ALIGN(virt, page_size) > end) + continue; + + /* + * Existing mappings should already match the structure of the + * unrestricted physmap. + */ + if (WARN_ON_ONCE(level != level_asi)) + continue; + + set_pte(pte_asi, *pte); + } +} + +/* + * Unmap pages previously mapped via asi_map(). + * + * Interrupts must be enabled as this does a TLB shootdown. + */ +void asi_unmap(struct page *page, int numpages) +{ + size_t start = (size_t)page_to_virt(page); + size_t end = start + (PAGE_SIZE * numpages); + pgtbl_mod_mask mask = 0; + + VM_BUG_ON(!IS_ALIGNED(page_to_pfn(page), pageblock_nr_pages)); + VM_BUG_ON(!IS_ALIGNED(numpages, pageblock_nr_pages)); + + vunmap_pgd_range(asi_pgd(ASI_GLOBAL_NONSENSITIVE), start, end, &mask); + + flush_tlb_kernel_range(start, end - 1); +} diff --git a/include/linux/asi.h b/include/linux/asi.h index 2d3049d5fe423e139dcce8f3d68cdffcc0ec0bfe..ee9811f04a417556cf2e930644eaf05f3c9bfee3 100644 --- a/include/linux/asi.h +++ b/include/linux/asi.h @@ -13,6 +13,8 @@ struct asi {}; static inline pgd_t *asi_pgd(struct asi *asi) { return NULL; } +static inline void asi_map(struct page *page, int numpages) { } +static inline void asi_unmap(struct page *page, int numpages) { } #endif /* CONFIG_MITIGATION_ADDRESS_SPACE_ISOLATION */ #endif /* _INCLUDE_ASI_H */ diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 31e9ffd936e39334ddaff910222d4751c18da5e7..c498ba127b4a511b5a6f10afa2aae535509fc153 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -11,6 +11,7 @@ #include <asm/page.h> /* pgprot_t */ #include <linux/rbtree.h> #include <linux/overflow.h> +#include <linux/pgtable.h> #include <asm/vmalloc.h> @@ -324,4 +325,7 @@ bool vmalloc_dump_obj(void *object); static inline bool vmalloc_dump_obj(void *object) { return false; } #endif +void vunmap_pgd_range(pgd_t *pgd_table, unsigned long addr, unsigned long end, + pgtbl_mod_mask *mask); + #endif /* _LINUX_VMALLOC_H */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 61981ee1c9d2f769d4a06ab542fc84334c1b0cbd..ffeb823398809388c0599f51929a7f3506ed035f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -427,6 +427,24 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, } while (p4d++, addr = next, addr != end); } +void vunmap_pgd_range(pgd_t *pgd_table, unsigned long addr, unsigned long end, + pgtbl_mod_mask *mask) +{ + unsigned long next; + pgd_t *pgd = pgd_offset_pgd(pgd_table, addr); + + BUG_ON(addr >= end); + + do { + next = pgd_addr_end(addr, end); + if (pgd_bad(*pgd)) + *mask |= PGTBL_PGD_MODIFIED; + if (pgd_none_or_clear_bad(pgd)) + continue; + vunmap_p4d_range(pgd, addr, next, mask); + } while (pgd++, addr = next, addr != end); +} + /* * vunmap_range_noflush is similar to vunmap_range, but does not * flush caches or TLBs. @@ -441,21 +459,9 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, */ void __vunmap_range_noflush(unsigned long start, unsigned long end) { - unsigned long next; - pgd_t *pgd; - unsigned long addr = start; pgtbl_mod_mask mask = 0; - BUG_ON(addr >= end); - pgd = pgd_offset_k(addr); - do { - next = pgd_addr_end(addr, end); - if (pgd_bad(*pgd)) - mask |= PGTBL_PGD_MODIFIED; - if (pgd_none_or_clear_bad(pgd)) - continue; - vunmap_p4d_range(pgd, addr, next, &mask); - } while (pgd++, addr = next, addr != end); + vunmap_pgd_range(init_mm.pgd, start, end, &mask); if (mask & ARCH_PAGE_TABLE_SYNC_MASK) arch_sync_kernel_mappings(start, end); -- 2.49.0.rc1.451.g8f38331e32-goog