A new flag, VM_LOCAL_NONSENSITIVE is added to designate locally non-sensitive vmalloc/vmap areas. When using the __vmalloc / __vmalloc_node APIs, if the corresponding GFP flag is specified, the VM flag is automatically added. When using the __vmalloc_node_range API, either flag can be specified independently. The VM flag will only map the vmalloc area as non-sensitive, while the GFP flag will only map the underlying direct map area as non-sensitive. When using the __vmalloc_node_range API, instead of VMALLOC_START/END, VMALLOC_LOCAL_NONSENSITIVE_START/END should be used. This is the range that will have different ASI page tables for each process, thereby providing the local mapping. A command line parameter vmalloc_local_nonsensitive_percent is added to specify the approximate division between the per-process and global vmalloc ranges. Note that regular/sensitive vmalloc/vmap allocations are not restricted by this division and can go anywhere in the entire vmalloc range. The division only applies to non-sensitive allocations. Since no attempt is made to balance regular/sensitive allocations across the division, it is possible that one of these ranges gets filled up by regular allocations, leaving no room for the non-sensitive allocations for which that range was designated. But since the vmalloc range is fairly large, so hopefully that will not be a problem in practice. If that assumption turns out to be incorrect, we could implement a more sophisticated scheme. Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx> --- arch/x86/include/asm/asi.h | 2 + arch/x86/include/asm/page_64.h | 2 + arch/x86/include/asm/pgtable_64_types.h | 7 ++- arch/x86/mm/asi.c | 57 ++++++++++++++++++ include/asm-generic/asi.h | 5 ++ include/linux/vmalloc.h | 6 ++ mm/vmalloc.c | 78 ++++++++++++++++++++----- 7 files changed, 142 insertions(+), 15 deletions(-) diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h index f11010c0334b..e3cbf6d8801e 100644 --- a/arch/x86/include/asm/asi.h +++ b/arch/x86/include/asm/asi.h @@ -46,6 +46,8 @@ DECLARE_PER_CPU_ALIGNED(struct asi_state, asi_cpu_state); extern pgd_t asi_global_nonsensitive_pgd[]; +void asi_vmalloc_init(void); + int asi_init_mm_state(struct mm_struct *mm); void asi_free_mm_state(struct mm_struct *mm); diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h index 2845eca02552..b17574349572 100644 --- a/arch/x86/include/asm/page_64.h +++ b/arch/x86/include/asm/page_64.h @@ -18,6 +18,8 @@ extern unsigned long vmemmap_base; #ifdef CONFIG_ADDRESS_SPACE_ISOLATION +extern unsigned long vmalloc_global_nonsensitive_start; +extern unsigned long vmalloc_local_nonsensitive_end; extern unsigned long asi_local_map_base; DECLARE_STATIC_KEY_FALSE(asi_local_map_initialized); diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 0fc380ba25b8..06793f7ef1aa 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -142,8 +142,13 @@ extern unsigned int ptrs_per_p4d; #define VMALLOC_END (VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1) #ifdef CONFIG_ADDRESS_SPACE_ISOLATION -#define VMALLOC_GLOBAL_NONSENSITIVE_START VMALLOC_START + +#define VMALLOC_LOCAL_NONSENSITIVE_START VMALLOC_START +#define VMALLOC_LOCAL_NONSENSITIVE_END vmalloc_local_nonsensitive_end + +#define VMALLOC_GLOBAL_NONSENSITIVE_START vmalloc_global_nonsensitive_start #define VMALLOC_GLOBAL_NONSENSITIVE_END VMALLOC_END + #endif #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE) diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c index 3ba0971a318d..91e5ff1224ff 100644 --- a/arch/x86/mm/asi.c +++ b/arch/x86/mm/asi.c @@ -3,6 +3,7 @@ #include <linux/init.h> #include <linux/memblock.h> #include <linux/memcontrol.h> +#include <linux/moduleparam.h> #include <asm/asi.h> #include <asm/pgalloc.h> @@ -28,6 +29,17 @@ EXPORT_SYMBOL(asi_local_map_initialized); unsigned long asi_local_map_base __ro_after_init; EXPORT_SYMBOL(asi_local_map_base); +unsigned long vmalloc_global_nonsensitive_start __ro_after_init; +EXPORT_SYMBOL(vmalloc_global_nonsensitive_start); + +unsigned long vmalloc_local_nonsensitive_end __ro_after_init; +EXPORT_SYMBOL(vmalloc_local_nonsensitive_end); + +/* Approximate percent only. Rounded to PGDIR_SIZE boundary. */ +static uint vmalloc_local_nonsensitive_percent __ro_after_init = 50; +core_param(vmalloc_local_nonsensitive_percent, + vmalloc_local_nonsensitive_percent, uint, 0444); + int asi_register_class(const char *name, uint flags, const struct asi_hooks *ops) { @@ -307,6 +319,10 @@ int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi) i++) set_pgd(asi->pgd + i, mm->asi[0].pgd[i]); + for (i = pgd_index(VMALLOC_LOCAL_NONSENSITIVE_START); + i <= pgd_index(VMALLOC_LOCAL_NONSENSITIVE_END); i++) + set_pgd(asi->pgd + i, mm->asi[0].pgd[i]); + for (i = pgd_index(VMALLOC_GLOBAL_NONSENSITIVE_START); i < PTRS_PER_PGD; i++) set_pgd(asi->pgd + i, asi_global_nonsensitive_pgd[i]); @@ -432,6 +448,10 @@ void asi_free_mm_state(struct mm_struct *mm) pgd_index(ASI_LOCAL_MAP + PFN_PHYS(max_possible_pfn)) + 1); + asi_free_pgd_range(&mm->asi[0], + pgd_index(VMALLOC_LOCAL_NONSENSITIVE_START), + pgd_index(VMALLOC_LOCAL_NONSENSITIVE_END) + 1); + free_page((ulong)mm->asi[0].pgd); } @@ -671,3 +691,40 @@ void asi_sync_mapping(struct asi *asi, void *start, size_t len) for (; addr < end; addr = pgd_addr_end(addr, end)) asi_clone_pgd(asi->pgd, asi->mm->asi[0].pgd, addr); } + +void __init asi_vmalloc_init(void) +{ + uint start_index = pgd_index(VMALLOC_START); + uint end_index = pgd_index(VMALLOC_END); + uint global_start_index; + + if (!boot_cpu_has(X86_FEATURE_ASI)) { + vmalloc_global_nonsensitive_start = VMALLOC_START; + vmalloc_local_nonsensitive_end = VMALLOC_END; + return; + } + + if (vmalloc_local_nonsensitive_percent == 0) { + vmalloc_local_nonsensitive_percent = 1; + pr_warn("vmalloc_local_nonsensitive_percent must be non-zero"); + } + + if (vmalloc_local_nonsensitive_percent >= 100) { + vmalloc_local_nonsensitive_percent = 99; + pr_warn("vmalloc_local_nonsensitive_percent must be less than 100"); + } + + global_start_index = start_index + (end_index - start_index) * + vmalloc_local_nonsensitive_percent / 100; + global_start_index = max(global_start_index, start_index + 1); + + vmalloc_global_nonsensitive_start = -(PTRS_PER_PGD - global_start_index) + * PGDIR_SIZE; + vmalloc_local_nonsensitive_end = vmalloc_global_nonsensitive_start - 1; + + pr_debug("vmalloc_global_nonsensitive_start = %llx", + vmalloc_global_nonsensitive_start); + + VM_BUG_ON(vmalloc_local_nonsensitive_end >= VMALLOC_END); + VM_BUG_ON(vmalloc_global_nonsensitive_start <= VMALLOC_START); +} diff --git a/include/asm-generic/asi.h b/include/asm-generic/asi.h index a1c8ebff70e8..7c50d8b64fa4 100644 --- a/include/asm-generic/asi.h +++ b/include/asm-generic/asi.h @@ -18,6 +18,9 @@ #define VMALLOC_GLOBAL_NONSENSITIVE_START VMALLOC_START #define VMALLOC_GLOBAL_NONSENSITIVE_END VMALLOC_END +#define VMALLOC_LOCAL_NONSENSITIVE_START VMALLOC_START +#define VMALLOC_LOCAL_NONSENSITIVE_END VMALLOC_END + #ifndef _ASSEMBLY_ struct asi_hooks {}; @@ -36,6 +39,8 @@ static inline int asi_init_mm_state(struct mm_struct *mm) { return 0; } static inline void asi_free_mm_state(struct mm_struct *mm) { } +static inline void asi_vmalloc_init(void) { } + static inline int asi_init(struct mm_struct *mm, int asi_index, struct asi **out_asi) { diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 5f85690f27b6..2b4eafc21fa5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -41,8 +41,10 @@ struct notifier_block; /* in notifier.h */ #ifdef CONFIG_ADDRESS_SPACE_ISOLATION #define VM_GLOBAL_NONSENSITIVE 0x00000800 /* Similar to __GFP_GLOBAL_NONSENSITIVE */ +#define VM_LOCAL_NONSENSITIVE 0x00001000 /* Similar to __GFP_LOCAL_NONSENSITIVE */ #else #define VM_GLOBAL_NONSENSITIVE 0 +#define VM_LOCAL_NONSENSITIVE 0 #endif /* bits [20..32] reserved for arch specific ioremap internals */ @@ -67,6 +69,10 @@ struct vm_struct { unsigned int nr_pages; phys_addr_t phys_addr; const void *caller; +#ifdef CONFIG_ADDRESS_SPACE_ISOLATION + /* Valid if flags contain VM_*_NONSENSITIVE */ + struct asi *asi; +#endif }; struct vmap_area { diff --git a/mm/vmalloc.c b/mm/vmalloc.c index f13bfe7e896b..ea94d8a1e2e9 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -2391,18 +2391,25 @@ void __init vmalloc_init(void) */ vmap_init_free_space(); vmap_initialized = true; + + asi_vmalloc_init(); } +#ifdef CONFIG_ADDRESS_SPACE_ISOLATION + static int asi_map_vm_area(struct vm_struct *area) { if (!static_asi_enabled()) return 0; if (area->flags & VM_GLOBAL_NONSENSITIVE) - return asi_map(ASI_GLOBAL_NONSENSITIVE, area->addr, - get_vm_area_size(area)); + area->asi = ASI_GLOBAL_NONSENSITIVE; + else if (area->flags & VM_LOCAL_NONSENSITIVE) + area->asi = ASI_LOCAL_NONSENSITIVE; + else + return 0; - return 0; + return asi_map(area->asi, area->addr, get_vm_area_size(area)); } static void asi_unmap_vm_area(struct vm_struct *area) @@ -2415,11 +2422,17 @@ static void asi_unmap_vm_area(struct vm_struct *area) * the case when the existing flush from try_purge_vmap_area_lazy() * and/or vm_unmap_aliases() happens non-lazily. */ - if (area->flags & VM_GLOBAL_NONSENSITIVE) - asi_unmap(ASI_GLOBAL_NONSENSITIVE, area->addr, - get_vm_area_size(area), true); + if (area->flags & (VM_GLOBAL_NONSENSITIVE | VM_LOCAL_NONSENSITIVE)) + asi_unmap(area->asi, area->addr, get_vm_area_size(area), true); } +#else + +static inline int asi_map_vm_area(struct vm_struct *area) { return 0; } +static inline void asi_unmap_vm_area(struct vm_struct *area) { } + +#endif + static inline void setup_vmalloc_vm_locked(struct vm_struct *vm, struct vmap_area *va, unsigned long flags, const void *caller) { @@ -2463,6 +2476,15 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, if (unlikely(!size)) return NULL; + if (static_asi_enabled()) { + VM_BUG_ON((flags & VM_LOCAL_NONSENSITIVE) && + !(start >= VMALLOC_LOCAL_NONSENSITIVE_START && + end <= VMALLOC_LOCAL_NONSENSITIVE_END)); + + VM_BUG_ON((flags & VM_GLOBAL_NONSENSITIVE) && + start < VMALLOC_GLOBAL_NONSENSITIVE_START); + } + if (flags & VM_IOREMAP) align = 1ul << clamp_t(int, get_count_order_long(size), PAGE_SHIFT, IOREMAP_MAX_ORDER); @@ -3073,8 +3095,22 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align, if (WARN_ON_ONCE(!size)) return NULL; - if (static_asi_enabled() && (vm_flags & VM_GLOBAL_NONSENSITIVE)) - gfp_mask |= __GFP_ZERO; + if (static_asi_enabled()) { + VM_BUG_ON((vm_flags & (VM_LOCAL_NONSENSITIVE | + VM_GLOBAL_NONSENSITIVE)) == + (VM_LOCAL_NONSENSITIVE | VM_GLOBAL_NONSENSITIVE)); + + if ((vm_flags & VM_LOCAL_NONSENSITIVE) && + !mm_asi_enabled(current->mm)) { + vm_flags &= ~VM_LOCAL_NONSENSITIVE; + + if (end == VMALLOC_LOCAL_NONSENSITIVE_END) + end = VMALLOC_END; + } + + if (vm_flags & (VM_GLOBAL_NONSENSITIVE | VM_LOCAL_NONSENSITIVE)) + gfp_mask |= __GFP_ZERO; + } if ((size >> PAGE_SHIFT) > totalram_pages()) { warn_alloc(gfp_mask, NULL, @@ -3166,11 +3202,19 @@ void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask, int node, const void *caller) { ulong vm_flags = 0; + ulong start = VMALLOC_START, end = VMALLOC_END; - if (static_asi_enabled() && (gfp_mask & __GFP_GLOBAL_NONSENSITIVE)) - vm_flags |= VM_GLOBAL_NONSENSITIVE; + if (static_asi_enabled()) { + if (gfp_mask & __GFP_GLOBAL_NONSENSITIVE) { + vm_flags |= VM_GLOBAL_NONSENSITIVE; + start = VMALLOC_GLOBAL_NONSENSITIVE_START; + } else if (gfp_mask & __GFP_LOCAL_NONSENSITIVE) { + vm_flags |= VM_LOCAL_NONSENSITIVE; + end = VMALLOC_LOCAL_NONSENSITIVE_END; + } + } - return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END, + return __vmalloc_node_range(size, align, start, end, gfp_mask, PAGE_KERNEL, vm_flags, node, caller); } /* @@ -3678,9 +3722,15 @@ struct vm_struct **pcpu_get_vm_areas(const unsigned long *offsets, /* verify parameters and allocate data structures */ BUG_ON(offset_in_page(align) || !is_power_of_2(align)); - if (static_asi_enabled() && (flags & VM_GLOBAL_NONSENSITIVE)) { - vmalloc_start = VMALLOC_GLOBAL_NONSENSITIVE_START; - vmalloc_end = VMALLOC_GLOBAL_NONSENSITIVE_END; + if (static_asi_enabled()) { + VM_BUG_ON((flags & (VM_LOCAL_NONSENSITIVE | + VM_GLOBAL_NONSENSITIVE)) == + (VM_LOCAL_NONSENSITIVE | VM_GLOBAL_NONSENSITIVE)); + + if (flags & VM_GLOBAL_NONSENSITIVE) + vmalloc_start = VMALLOC_GLOBAL_NONSENSITIVE_START; + else if (flags & VM_LOCAL_NONSENSITIVE) + vmalloc_end = VMALLOC_LOCAL_NONSENSITIVE_END; } vmalloc_start = ALIGN(vmalloc_start, align); -- 2.35.1.473.g83b2b277ed-goog