From: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> Relocate the kernel address space layout to a specific range, which is similar to KASLR. Since there is not enough room for KASAN, KASAN is not supported for PVM guest. Suggested-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> Signed-off-by: Hou Wenlong <houwenlong.hwl@xxxxxxxxxxxx> Signed-off-by: Lai Jiangshan <jiangshan.ljs@xxxxxxxxxxxx> --- arch/x86/Kconfig | 3 +- arch/x86/include/asm/pvm_para.h | 6 +++ arch/x86/kernel/head64_identity.c | 6 +++ arch/x86/kernel/pvm.c | 64 +++++++++++++++++++++++++++++++ arch/x86/mm/kaslr.c | 4 ++ 5 files changed, 82 insertions(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 1b4bea3db53d..ded687cc23ad 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -853,7 +853,8 @@ config KVM_GUEST config PVM_GUEST bool "PVM Guest support" - depends on X86_64 && KVM_GUEST && X86_PIE + depends on X86_64 && KVM_GUEST && X86_PIE && !KASAN + select RANDOMIZE_MEMORY select RELOCATABLE_UNCOMPRESSED_KERNEL default n help diff --git a/arch/x86/include/asm/pvm_para.h b/arch/x86/include/asm/pvm_para.h index efd7afdf9be9..ff0bf0fe7dc4 100644 --- a/arch/x86/include/asm/pvm_para.h +++ b/arch/x86/include/asm/pvm_para.h @@ -10,6 +10,7 @@ #include <uapi/asm/kvm_para.h> void __init pvm_early_setup(void); +bool __init pvm_kernel_layout_relocate(void); static inline void pvm_cpuid(unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) @@ -64,6 +65,11 @@ static inline bool pvm_detect(void) static inline void pvm_early_setup(void) { } + +static inline bool pvm_kernel_layout_relocate(void) +{ + return false; +} #endif /* CONFIG_PVM_GUEST */ #endif /* _ASM_X86_PVM_PARA_H */ diff --git a/arch/x86/kernel/head64_identity.c b/arch/x86/kernel/head64_identity.c index f69f9904003c..467fe493c9ba 100644 --- a/arch/x86/kernel/head64_identity.c +++ b/arch/x86/kernel/head64_identity.c @@ -396,6 +396,12 @@ static void __head detect_pvm_range(void) pml4_index_end = (msr_val >> 16) & 0x1ff; pvm_range_start = (0x1fffe00 | pml4_index_start) * P4D_SIZE; pvm_range_end = (0x1fffe00 | pml4_index_end) * P4D_SIZE; + + /* + * early page fault would map page into directing mapping area, + * so we should modify 'page_offset_base' here early. + */ + page_offset_base = pvm_range_start; } void __head pvm_relocate_kernel(unsigned long physbase) diff --git a/arch/x86/kernel/pvm.c b/arch/x86/kernel/pvm.c index fc82c71b305b..9cdfbaa15dbb 100644 --- a/arch/x86/kernel/pvm.c +++ b/arch/x86/kernel/pvm.c @@ -10,7 +10,10 @@ */ #define pr_fmt(fmt) "pvm-guest: " fmt +#include <linux/mm_types.h> + #include <asm/cpufeature.h> +#include <asm/cpu_entry_area.h> #include <asm/pvm_para.h> unsigned long pvm_range_start __initdata; @@ -23,3 +26,64 @@ void __init pvm_early_setup(void) setup_force_cpu_cap(X86_FEATURE_KVM_PVM_GUEST); } + +#define TB_SHIFT 40 +#define HOLE_SIZE (1UL << 39) + +#define PVM_DIRECT_MAPPING_SIZE (8UL << TB_SHIFT) +#define PVM_VMALLOC_SIZE (5UL << TB_SHIFT) +#define PVM_VMEM_MAPPING_SIZE (1UL << TB_SHIFT) + +/* + * For a PVM guest, the hypervisor would provide one valid virtual address + * range for the guest kernel. The guest kernel needs to adjust its layout, + * including the direct mapping area, vmalloc area, vmemmap area, and CPU entry + * area, to be within this range. If the range start is 0xffffd90000000000, the + * PVM guest kernel with 4-level page tables could arrange its layout as + * follows: + * + * ffff800000000000 - ffff87ffffffffff (=43 bits) guard hole, reserved for hypervisor + * ... host kernel used ... guest kernel range start + * ffffd90000000000 - ffffe0ffffffffff (=8 TB) directing mapping of all physical memory + * ffffe10000000000 - ffffe17fffffffff (=39 bit) hole + * ffffe18000000000 - ffffe67fffffffff (=5 TB) vmalloc/ioremap space + * ffffe68000000000 - ffffe6ffffffffff (=39 bit) hole + * ffffe70000000000 - ffffe7ffffffffff (=40 bit) virtual memory map (1TB) + * ffffe80000000000 - ffffe87fffffffff (=39 bit) cpu_entry_area mapping + * ffffe88000000000 - ffffe8ff7fffffff (=510 G) hole + * ffffe8ff80000000 - ffffe8ffffffffff (=2 G) kernel image + * ... host kernel used ... guest kernel range end + * + */ +bool __init pvm_kernel_layout_relocate(void) +{ + unsigned long area_size; + + if (!boot_cpu_has(X86_FEATURE_KVM_PVM_GUEST)) { + vmemory_end = VMALLOC_START + (VMALLOC_SIZE_TB << 40) - 1; + return false; + } + + if (!IS_ALIGNED(pvm_range_start, PGDIR_SIZE)) + panic("The start of the allowed range is not aligned"); + + area_size = max_pfn << PAGE_SHIFT; + if (area_size > PVM_DIRECT_MAPPING_SIZE) + panic("The memory size is too large for directing mapping area"); + + vmalloc_base = page_offset_base + PVM_DIRECT_MAPPING_SIZE + HOLE_SIZE; + vmemory_end = vmalloc_base + PVM_VMALLOC_SIZE; + + vmemmap_base = vmemory_end + HOLE_SIZE; + area_size = max_pfn * sizeof(struct page); + if (area_size > PVM_VMEM_MAPPING_SIZE) + panic("The memory size is too large for virtual memory mapping area"); + + cpu_entry_area_base = vmemmap_base + PVM_VMEM_MAPPING_SIZE; + BUILD_BUG_ON(CPU_ENTRY_AREA_MAP_SIZE > (1UL << 39)); + + if (cpu_entry_area_base + (2UL << 39) > pvm_range_end) + panic("The size of the allowed range is too small"); + + return true; +} diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index e3825c7542a3..f6f332abf515 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -28,6 +28,7 @@ #include <asm/setup.h> #include <asm/kaslr.h> +#include <asm/pvm_para.h> #include "mm_internal.h" @@ -82,6 +83,9 @@ void __init kernel_randomize_memory(void) BUILD_BUG_ON(vaddr_end != RAW_CPU_ENTRY_AREA_BASE); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); + if (pvm_kernel_layout_relocate()) + return; + if (!kaslr_memory_enabled()) return; -- 2.19.1.6.gb485710b