From: Ashish Kalra <ashish.kalra@xxxxxxx> Handle cases where the RMP table placement in the BIOS is not 2M aligned and then the kexec kernel could try to allocate from within that chunk and that causes a fatal RMP fault. The kexec failure is illustrated below from the kernel logs: [ 0.000000] SEV-SNP: RMP table physical range [0x0000007ffe800000 - 0x000000807f0fffff] [ 0.000000] BIOS-provided physical RAM map: [ 0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000008efff] usable [ 0.000000] BIOS-e820: [mem 0x000000000008f000-0x000000000008ffff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x0000000000090000-0x000000000009ffff] usable [ 0.000000] BIOS-e820: [mem 0x0000000000100000-0x000000005a14afff] usable [ 0.000000] BIOS-e820: [mem 0x000000005a14b000-0x000000005a34afff] reserved [ 0.000000] BIOS-e820: [mem 0x000000005a34b000-0x0000000067acefff] usable [ 0.000000] BIOS-e820: [mem 0x0000000067acf000-0x000000006dfcefff] reserved [ 0.000000] BIOS-e820: [mem 0x000000006dfcf000-0x000000006edfefff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x000000006edff000-0x000000006effefff] ACPI data [ 0.000000] BIOS-e820: [mem 0x000000006efff000-0x000000006effffff] usable [ 0.000000] BIOS-e820: [mem 0x000000006f000000-0x000000006f00afff] ACPI NVS [ 0.000000] BIOS-e820: [mem 0x000000006f00b000-0x000000006fffffff] usable [ 0.000000] BIOS-e820: [mem 0x0000000070000000-0x000000008fffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000aa000000-0x00000000aaffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000c5000000-0x00000000c5ffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000e0000000-0x00000000e0ffffff] reserved [ 0.000000] BIOS-e820: [mem 0x00000000fd000000-0x00000000ffffffff] reserved [ 0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000407fcfffff] usable [ 0.000000] BIOS-e820: [mem 0x000000407fd00000-0x000000407fffffff] reserved [ 0.000000] BIOS-e820: [mem 0x0000004080000000-0x0000007ffe7fffff] usable [ 0.000000] BIOS-e820: [mem 0x0000007ffe800000-0x000000807f0fffff] reserved [ 0.000000] BIOS-e820: [mem 0x000000807f100000-0x000000807f1fefff] usable As seen here in the e820 memory map, the end range of RMP table is not aligned to 2MB and not reserved and usable as RAM. Subsequently, kexec -s (KEXEC_FILE_LOAD syscall) loads it's purgatory code and boot_param, command line and other setup data into this RAM region as seen in the kexec logs below, which leads to fatal RMP fault during kexec boot. [ 173.113085] Loaded purgatory at 0x807f1fa000 [ 173.113099] Loaded boot_param, command line and misc at 0x807f1f8000 bufsz=0x1350 memsz=0x2000 [ 173.113107] Loaded 64bit kernel at 0x7ffae00000 bufsz=0xd06200 memsz=0x3894000 [ 173.113291] Loaded initrd at 0x7ff6c89000 bufsz=0x4176014 memsz=0x4176014 [ 173.113296] E820 memmap: [ 173.113298] 0000000000000000-000000000008efff (1) [ 173.113300] 000000000008f000-000000000008ffff (4) [ 173.113302] 0000000000090000-000000000009ffff (1) [ 173.113303] 0000000000100000-000000005a14afff (1) [ 173.113305] 000000005a14b000-000000005a34afff (2) [ 173.113306] 000000005a34b000-0000000067acefff (1) [ 173.113308] 0000000067acf000-000000006dfcefff (2) [ 173.113309] 000000006dfcf000-000000006edfefff (4) [ 173.113311] 000000006edff000-000000006effefff (3) [ 173.113312] 000000006efff000-000000006effffff (1) [ 173.113314] 000000006f000000-000000006f00afff (4) [ 173.113315] 000000006f00b000-000000006fffffff (1) [ 173.113317] 0000000070000000-000000008fffffff (2) [ 173.113318] 00000000aa000000-00000000aaffffff (2) [ 173.113319] 00000000c5000000-00000000c5ffffff (2) [ 173.113321] 00000000e0000000-00000000e0ffffff (2) [ 173.113322] 00000000fd000000-00000000ffffffff (2) [ 173.113324] 0000000100000000-000000407fcfffff (1) [ 173.113325] 000000407fd00000-000000407fffffff (2) [ 173.113327] 0000004080000000-0000007ffe7fffff (1) [ 173.113328] 0000007ffe800000-000000807f0fffff (2) [ 173.113330] 000000807f100000-000000807f1fefff (1) [ 173.113331] 000000807f1ff000-000000807fffffff (2) [ 173.690528] nr_segments = 4 [ 173.690533] segment[0]: buf=0x00000000e626d1a2 bufsz=0x4000 mem=0x807f1fa000 memsz=0x5000 [ 173.690546] segment[1]: buf=0x0000000029c67bd6 bufsz=0x1350 mem=0x807f1f8000 memsz=0x2000 [ 173.690552] segment[2]: buf=0x0000000045c60183 bufsz=0xd06200 mem=0x7ffae00000 memsz=0x3894000 [ 173.697994] segment[3]: buf=0x000000006e54f08d bufsz=0x4176014 mem=0x7ff6c89000 memsz=0x4177000 [ 173.708672] kexec_file_load: type:0, start:0x807f1fa150 head:0x1184d0002 flags:0x0 Check if RMP table start & end physical range in e820 tables are not aligned to 2MB and in that case map this range to reserved in all the three e820 tables. Fixes: c3b86e61b756 ("x86/cpufeatures: Enable/unmask SEV-SNP CPU feature") Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx> --- arch/x86/include/asm/sev.h | 2 ++ arch/x86/mm/mem_encrypt.c | 13 ++++++++++++ arch/x86/virt/svm/sev.c | 42 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 7f57382afee4..24300927a476 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -269,6 +269,7 @@ int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 asid, bool immut int rmp_make_shared(u64 pfn, enum pg_level level); void snp_leak_pages(u64 pfn, unsigned int npages); void kdump_sev_callback(void); +void snp_rmptable_e820_fixup(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } @@ -282,6 +283,7 @@ static inline int rmp_make_private(u64 pfn, u64 gpa, enum pg_level level, u32 as static inline int rmp_make_shared(u64 pfn, enum pg_level level) { return -ENODEV; } static inline void snp_leak_pages(u64 pfn, unsigned int npages) {} static inline void kdump_sev_callback(void) { } +static inline void snp_rmptable_e820_fixup(void) {} #endif #endif diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 6f3b3e028718..d88c942dd311 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -102,6 +102,19 @@ void __init mem_encrypt_setup_arch(void) phys_addr_t total_mem = memblock_phys_mem_size(); unsigned long size; + /* + * Invoke callback to do RMP table fixups which needs to be called + * during setup_arch() after the e820 tables have been setup + * in e820__memory_setup() and this function is appropriate to + * invoke the callback to apply any memory encryption platform specific + * quirks. The callback to do RMP table fixups cannot be invoked from + * snp_init() as snp_init() is called from sme_enable() in + * startup_64() which is before setup_arch() and e820 tables + * have still not been setup. + */ + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + snp_rmptable_e820_fixup(); + if (!cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) return; diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index ab0e8448bb6e..1b4b99b26bec 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -163,6 +163,48 @@ bool snp_probe_rmptable_info(void) return true; } +static void __init __snp_e820_tables_fixup(u64 pa) +{ + if (IS_ALIGNED(pa, PMD_SIZE)) + return; + + /* + * Check if RMP table start and end physical range + * in e820_tables are not aligned to 2MB and in that case map + * this range in all the three e820 tables to be reserved. + * The e820_table needs to be updated as it is converted to + * kernel memory resources and used by KEXEC_FILE_LOAD syscall + * to load kexec segments. The e820_table_firmware needs to be + * updated as it is exposed to sysfs and used by KEXEC_LOAD + * syscall to load kexec segments and e820_table_kexec needs + * to be updated as it passed to kexec-ed kernel. + */ + pa = ALIGN_DOWN(pa, PMD_SIZE); + if (e820__mapped_any(pa, pa + PMD_SIZE, E820_TYPE_RAM)) { + pr_info("Reserving start/end of RMP table on a 2MB boundary [0x%016llx]\n", pa); + e820__range_update(pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__range_update_table(e820_table_kexec, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + e820__range_update_table(e820_table_firmware, pa, PMD_SIZE, E820_TYPE_RAM, E820_TYPE_RESERVED); + } +} + +/* + * Callback to do any RMP table fixups, needs to be called + * after e820__memory_setup(), after the e820 tables are + * setup/populated and before e820__reserve_resources(), before + * the e820 map has been converted to the standard Linux memory + * resources and e820 map is no longer used and modifying it + * has no effect. Handle cases where the RMP table placement in + * the BIOS is not 2M aligned and then the kexec kernel could + * try to allocate from within that chunk and that causes a + * fatal RMP fault. + */ +void __init snp_rmptable_e820_fixup(void) +{ + __snp_e820_tables_fixup(probed_rmp_base); + __snp_e820_tables_fixup(probed_rmp_base + probed_rmp_size); +} + /* * Do the necessary preparations which are verified by the firmware as * described in the SNP_INIT_EX firmware command description in the SNP -- 2.34.1 _______________________________________________ kexec mailing list kexec@xxxxxxxxxxxxxxxxxxx http://lists.infradead.org/mailman/listinfo/kexec