From: Ofir Weisse <oweisse@xxxxxxxxxx> There are several areas in memory which we consider non sensitive. These areas should be mapped in every ASI domain. We map there areas in asi_global_init(). We modified some of the linking scripts to ensure these areas are starting and ending on page boundaries. The areas: - _stext --> _etext - __init_begin --> __init_end - __start_rodata --> __end_rodata - __start_once --> __end_once - __start___ex_table --> __stop___ex_table - __start_asi_nonsensitive --> __end_asi_nonsensitive - __start_asi_nonsensitive_readmostly --> __end_asi_nonsensitive_readmostly - __vvar_page --> + PAGE_SIZE - APIC_BASE --> + PAGE_SIZE - phys_base --> + PAGE_SIZE - __start___tracepoints_ptrs --> __stop___tracepoints_ptrs - __start___tracepoint_str --> __stop___tracepoint_str - __per_cpu_asi_start --> __per_cpu_asi_end (percpu) - irq_stack_backing_store --> + sizeof(irq_stack_backing_store) (percpu) The pgd's of the following addresses are cloned, modeled after KPTI: - CPU_ENTRY_AREA_BASE - ESPFIX_BASE_ADDR Signed-off-by: Ofir Weisse <oweisse@xxxxxxxxxx> --- arch/x86/kernel/head_64.S | 12 +++++ arch/x86/kernel/vmlinux.lds.S | 2 +- arch/x86/mm/asi.c | 82 +++++++++++++++++++++++++++++++ include/asm-generic/vmlinux.lds.h | 13 +++-- 4 files changed, 105 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index d8b3ebd2bb85..3d3874661895 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -574,9 +574,21 @@ SYM_DATA_LOCAL(early_gdt_descr_base, .quad INIT_PER_CPU_VAR(gdt_page)) .align 16 /* This must match the first entry in level2_kernel_pgt */ + +#ifdef CONFIG_ADDRESS_SPACE_ISOLATION +/* TODO: Find a way to mark .section for phys_base */ +/* Ideally, we want to map phys_base in .data..asi_non_sensitive. That doesn't + * seem to work properly. For now, we just make sure phys_base is in it's own + * page. */ + .align PAGE_SIZE +#endif SYM_DATA(phys_base, .quad 0x0) EXPORT_SYMBOL(phys_base) +#ifdef CONFIG_ADDRESS_SPACE_ISOLATION + .align PAGE_SIZE +#endif + #include "../../x86/xen/xen-head.S" __PAGE_ALIGNED_BSS diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 3d6dc12d198f..2b3668291785 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -148,8 +148,8 @@ SECTIONS } :text =0xcccc /* End of text section, which should occupy whole number of pages */ - _etext = .; . = ALIGN(PAGE_SIZE); + _etext = .; X86_ALIGN_RODATA_BEGIN RO_DATA(PAGE_SIZE) diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c index 04628949e89d..7f2aa1823736 100644 --- a/arch/x86/mm/asi.c +++ b/arch/x86/mm/asi.c @@ -9,6 +9,7 @@ #include <asm/asi.h> #include <asm/pgalloc.h> +#include <asm/processor.h> /* struct irq_stack */ #include <asm/mmu_context.h> #include "mm_internal.h" @@ -17,6 +18,24 @@ #undef pr_fmt #define pr_fmt(fmt) "ASI: " fmt +#include <linux/extable.h> +#include <asm-generic/sections.h> + +extern struct exception_table_entry __start___ex_table[]; +extern struct exception_table_entry __stop___ex_table[]; + +extern const char __start_asi_nonsensitive[], __end_asi_nonsensitive[]; +extern const char __start_asi_nonsensitive_readmostly[], + __end_asi_nonsensitive_readmostly[]; +extern const char __per_cpu_asi_start[], __per_cpu_asi_end[]; +extern const char *__start___tracepoint_str[]; +extern const char *__stop___tracepoint_str[]; +extern const char *__start___tracepoints_ptrs[]; +extern const char *__stop___tracepoints_ptrs[]; +extern const char __vvar_page[]; + +DECLARE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store); + static struct asi_class asi_class[ASI_MAX_NUM] __asi_not_sensitive; static DEFINE_SPINLOCK(asi_class_lock __asi_not_sensitive); @@ -412,6 +431,7 @@ void asi_unload_module(struct module* module) static int __init asi_global_init(void) { uint i, n; + int err = 0; if (!boot_cpu_has(X86_FEATURE_ASI)) return 0; @@ -436,6 +456,68 @@ static int __init asi_global_init(void) pcpu_map_asi_reserved_chunk(); + + /* + * TODO: We need to ensure that all the sections mapped below are + * actually page-aligned by the linker. For now, we temporarily just + * align the start/end addresses here, but that is incorrect as the + * rest of the page could potentially contain sensitive data. + */ +#define MAP_SECTION(start, end) \ + pr_err("%s:%d mapping 0x%lx --> 0x%lx", \ + __FUNCTION__, __LINE__, start, end); \ + err = asi_map(ASI_GLOBAL_NONSENSITIVE, \ + (void*)((unsigned long)(start) & PAGE_MASK),\ + PAGE_ALIGN((unsigned long)(end)) - \ + ((unsigned long)(start) & PAGE_MASK)); \ + BUG_ON(err); + +#define MAP_SECTION_PERCPU(start, size) \ + pr_err("%s:%d mapping PERCPU 0x%lx --> 0x%lx", \ + __FUNCTION__, __LINE__, start, (unsigned long)start+size); \ + err = asi_map_percpu(ASI_GLOBAL_NONSENSITIVE, \ + (void*)((unsigned long)(start) & PAGE_MASK), \ + PAGE_ALIGN((unsigned long)(size))); \ + BUG_ON(err); + + MAP_SECTION(_stext, _etext); + MAP_SECTION(__init_begin, __init_end); + MAP_SECTION(__start_rodata, __end_rodata); + MAP_SECTION(__start_once, __end_once); + MAP_SECTION(__start___ex_table, __stop___ex_table); + MAP_SECTION(__start_asi_nonsensitive, __end_asi_nonsensitive); + MAP_SECTION(__start_asi_nonsensitive_readmostly, + __end_asi_nonsensitive_readmostly); + MAP_SECTION(__vvar_page, __vvar_page + PAGE_SIZE); + MAP_SECTION(APIC_BASE, APIC_BASE + PAGE_SIZE); + MAP_SECTION(&phys_base, &phys_base + PAGE_SIZE); + + /* TODO: add a build flag to enable disable mapping only when + * instrumentation is used */ + MAP_SECTION(__start___tracepoints_ptrs, __stop___tracepoints_ptrs); + MAP_SECTION(__start___tracepoint_str, __stop___tracepoint_str); + + MAP_SECTION_PERCPU((void*)__per_cpu_asi_start, + __per_cpu_asi_end - __per_cpu_asi_start); + + MAP_SECTION_PERCPU(&irq_stack_backing_store, + sizeof(irq_stack_backing_store)); + + /* We have to map the stack canary into ASI. This is far from ideal, as + * attackers can use L1TF to steal the canary value, and then perhaps + * mount some other attack including a buffer overflow. This is a price + * we must pay to use ASI. + */ + MAP_SECTION_PERCPU(&fixed_percpu_data, PAGE_SIZE); + +#define CLONE_INIT_PGD(addr) \ + asi_clone_pgd(asi_global_nonsensitive_pgd, init_mm.pgd, addr); + + CLONE_INIT_PGD(CPU_ENTRY_AREA_BASE); +#ifdef CONFIG_X86_ESPFIX64 + CLONE_INIT_PGD(ESPFIX_BASE_ADDR); +#endif + return 0; } subsys_initcall(asi_global_init) diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0a931aedc285..7152ce3613f5 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -235,8 +235,10 @@ #define TRACE_PRINTKS() __start___trace_bprintk_fmt = .; \ KEEP(*(__trace_printk_fmt)) /* Trace_printk fmt' pointer */ \ __stop___trace_bprintk_fmt = .; -#define TRACEPOINT_STR() __start___tracepoint_str = .; \ +#define TRACEPOINT_STR() . = ALIGN(PAGE_SIZE); \ + __start___tracepoint_str = .; \ KEEP(*(__tracepoint_str)) /* Trace_printk fmt' pointer */ \ + . = ALIGN(PAGE_SIZE); \ __stop___tracepoint_str = .; #else #define TRACE_PRINTKS() @@ -348,8 +350,10 @@ MEM_KEEP(init.data*) \ MEM_KEEP(exit.data*) \ *(.data.unlikely) \ + . = ALIGN(PAGE_SIZE); \ __start_once = .; \ *(.data.once) \ + . = ALIGN(PAGE_SIZE); \ __end_once = .; \ STRUCT_ALIGN(); \ *(__tracepoints) \ @@ -453,9 +457,10 @@ *(.rodata) *(.rodata.*) \ SCHED_DATA \ RO_AFTER_INIT_DATA /* Read only after init */ \ - . = ALIGN(8); \ + . = ALIGN(PAGE_SIZE); \ __start___tracepoints_ptrs = .; \ KEEP(*(__tracepoints_ptrs)) /* Tracepoints: pointer array */ \ + . = ALIGN(PAGE_SIZE); \ __stop___tracepoints_ptrs = .; \ *(__tracepoints_strings)/* Tracepoints: strings */ \ } \ @@ -671,11 +676,13 @@ */ #define EXCEPTION_TABLE(align) \ . = ALIGN(align); \ + . = ALIGN(PAGE_SIZE); \ __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { \ __start___ex_table = .; \ KEEP(*(__ex_table)) \ + . = ALIGN(PAGE_SIZE); \ __stop___ex_table = .; \ - } + } \ /* * .BTF -- 2.35.1.473.g83b2b277ed-goog