From: "Madhavan T. Venkataraman" <madvenka@xxxxxxxxxxxxxxxxxxx> Introduce the "Persistent-Across-Kexec memory (prmem)" feature that allows user and kernel data to be persisted across kexecs. The first step is to set aside some memory for storing persistent data. Introduce a new kernel command line parameter for this: prmem=size[KMG] Allocate this memory from memblocks during boot. Make sure that the allocation is done late enough so it does not interfere with any fixed range allocations. Define a "prmem_region" structure to store the range that is allocated. The region structure will be used to manage the memory. Define a "prmem" structure for storing persistence metadata. Allocate a metadata page to contain the metadata structure. Initialize the metadata. Add the initial region to a region list in the metadata. Signed-off-by: Madhavan T. Venkataraman <madvenka@xxxxxxxxxxxxxxxxxxx> --- arch/x86/kernel/setup.c | 2 + include/linux/prmem.h | 76 ++++++++++++++++++++++++++++++++++++ kernel/Makefile | 1 + kernel/prmem/Makefile | 3 ++ kernel/prmem/prmem_init.c | 27 +++++++++++++ kernel/prmem/prmem_parse.c | 33 ++++++++++++++++ kernel/prmem/prmem_region.c | 21 ++++++++++ kernel/prmem/prmem_reserve.c | 56 ++++++++++++++++++++++++++ mm/mm_init.c | 2 + 9 files changed, 221 insertions(+) create mode 100644 include/linux/prmem.h create mode 100644 kernel/prmem/Makefile create mode 100644 kernel/prmem/prmem_init.c create mode 100644 kernel/prmem/prmem_parse.c create mode 100644 kernel/prmem/prmem_region.c create mode 100644 kernel/prmem/prmem_reserve.c diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index fd975a4a5200..f2b13b3d3ead 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -25,6 +25,7 @@ #include <linux/static_call.h> #include <linux/swiotlb.h> #include <linux/random.h> +#include <linux/prmem.h> #include <uapi/linux/mount.h> @@ -1231,6 +1232,7 @@ void __init setup_arch(char **cmdline_p) * won't consume hotpluggable memory. */ reserve_crashkernel(); + prmem_reserve(); memblock_find_dma_reserve(); diff --git a/include/linux/prmem.h b/include/linux/prmem.h new file mode 100644 index 000000000000..7f22016c4ad2 --- /dev/null +++ b/include/linux/prmem.h @@ -0,0 +1,76 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Persistent-Across-Kexec memory (prmem) - Definitions. + * + * Copyright (C) 2023 Microsoft Corporation + * Author: Madhavan T. Venkataraman (madvenka@xxxxxxxxxxxxxxxxxxx) + */ +#ifndef _LINUX_PRMEM_H +#define _LINUX_PRMEM_H +/* + * The prmem feature can be used to persist kernel and user data across kexec + * reboots in memory for various uses. E.g., + * + * - Saving cached data. E.g., database caches. + * - Saving state. E.g., KVM guest states. + * - Saving historical information since the last cold boot such as + * events, logs and journals. + * - Saving measurements for integrity checks on the next boot. + * - Saving driver data. + * - Saving IOMMU mappings. + * - Saving MMIO config information. + * + * This is useful on systems where there is no non-volatile storage or + * non-volatile storage is too slow. + */ +#include <linux/types.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/memblock.h> +#include <linux/printk.h> + +#include <asm-generic/errno.h> +#include <asm/page.h> +#include <asm/setup.h> +/* + * A prmem region supplies the memory for storing persistent data. + * + * node List node. + * pa Physical address of the region. + * size Size of the region in bytes. + */ +struct prmem_region { + struct list_head node; + unsigned long pa; + size_t size; +}; + +/* + * PRMEM metadata. + * + * metadata Physical address of the metadata page. + * size Size of initial memory allocated to prmem. + * + * regions List of memory regions. + */ +struct prmem { + unsigned long metadata; + size_t size; + + /* Persistent Regions. */ + struct list_head regions; +}; + +extern struct prmem *prmem; +extern unsigned long prmem_metadata; +extern unsigned long prmem_pa; +extern size_t prmem_size; + +/* Kernel API. */ +void prmem_reserve(void); +void prmem_init(void); + +/* Internal functions. */ +struct prmem_region *prmem_add_region(unsigned long pa, size_t size); + +#endif /* _LINUX_PRMEM_H */ diff --git a/kernel/Makefile b/kernel/Makefile index 3947122d618b..43b485b0467a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -50,6 +50,7 @@ obj-y += rcu/ obj-y += livepatch/ obj-y += dma/ obj-y += entry/ +obj-y += prmem/ obj-$(CONFIG_MODULES) += module/ obj-$(CONFIG_KCMP) += kcmp.o diff --git a/kernel/prmem/Makefile b/kernel/prmem/Makefile new file mode 100644 index 000000000000..11a53d49312a --- /dev/null +++ b/kernel/prmem/Makefile @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-y += prmem_parse.o prmem_reserve.o prmem_init.o prmem_region.o diff --git a/kernel/prmem/prmem_init.c b/kernel/prmem/prmem_init.c new file mode 100644 index 000000000000..97b550252028 --- /dev/null +++ b/kernel/prmem/prmem_init.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Persistent-Across-Kexec memory (prmem) - Initialization. + * + * Copyright (C) 2023 Microsoft Corporation + * Author: Madhavan T. Venkataraman (madvenka@xxxxxxxxxxxxxxxxxxx) + */ +#include <linux/prmem.h> + +bool prmem_inited; + +void __init prmem_init(void) +{ + if (!prmem) + return; + + if (!prmem->metadata) { + /* Cold boot. */ + prmem->metadata = prmem_metadata; + prmem->size = prmem_size; + INIT_LIST_HEAD(&prmem->regions); + + if (!prmem_add_region(prmem_pa, prmem_size)) + return; + } + prmem_inited = true; +} diff --git a/kernel/prmem/prmem_parse.c b/kernel/prmem/prmem_parse.c new file mode 100644 index 000000000000..191655b53545 --- /dev/null +++ b/kernel/prmem/prmem_parse.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Persistent-Across-Kexec memory (prmem) - Process prmem cmdline parameter. + * + * Copyright (C) 2023 Microsoft Corporation + * Author: Madhavan T. Venkataraman (madvenka@xxxxxxxxxxxxxxxxxxx) + */ +#include <linux/prmem.h> + +/* + * Syntax: prmem=size[KMG] + * + * Specifies the size of the initial memory to be allocated to prmem. + */ +static int __init prmem_size_parse(char *cmdline) +{ + char *tmp, *cur = cmdline; + unsigned long size; + + if (!cur) + return -EINVAL; + + /* Get initial size. */ + size = memparse(cur, &tmp); + if (cur == tmp || !size || size & (PAGE_SIZE - 1)) { + pr_warn("%s: Incorrect size %lx\n", __func__, size); + return -EINVAL; + } + + prmem_size = size; + return 0; +} +early_param("prmem", prmem_size_parse); diff --git a/kernel/prmem/prmem_region.c b/kernel/prmem/prmem_region.c new file mode 100644 index 000000000000..8254dafcee13 --- /dev/null +++ b/kernel/prmem/prmem_region.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Persistent-Across-Kexec memory (prmem) - Regions. + * + * Copyright (C) 2023 Microsoft Corporation + * Author: Madhavan T. Venkataraman (madvenka@xxxxxxxxxxxxxxxxxxx) + */ +#include <linux/prmem.h> + +struct prmem_region *prmem_add_region(unsigned long pa, size_t size) +{ + struct prmem_region *region; + + /* Allocate region structure from the base of the region itself. */ + region = __va(pa); + region->pa = pa; + region->size = size; + + list_add_tail(®ion->node, &prmem->regions); + return region; +} diff --git a/kernel/prmem/prmem_reserve.c b/kernel/prmem/prmem_reserve.c new file mode 100644 index 000000000000..e20e31a61d12 --- /dev/null +++ b/kernel/prmem/prmem_reserve.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Persistent-Across-Kexec memory (prmem) - Reserve memory. + * + * Copyright (C) 2023 Microsoft Corporation + * Author: Madhavan T. Venkataraman (madvenka@xxxxxxxxxxxxxxxxxxx) + */ +#include <linux/prmem.h> + +struct prmem *prmem; +unsigned long prmem_metadata; +unsigned long prmem_pa; +unsigned long prmem_size; + +void __init prmem_reserve(void) +{ + BUILD_BUG_ON(sizeof(*prmem) > PAGE_SIZE); + + if (!prmem_size) + return; + + /* + * prmem uses direct map addresses. If PAGE_OFFSET is randomized, + * these addresses will change across kexecs. Persistence cannot + * be supported. + */ + if (kaslr_memory_enabled()) { + pr_warn("%s: Cannot support persistence because of KASLR.\n", + __func__); + return; + } + + /* Allocate a metadata page. */ + prmem_metadata = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE); + if (!prmem_metadata) { + pr_warn("%s: Could not allocate metadata at %lx\n", __func__, + prmem_metadata); + return; + } + + /* Allocate initial memory. */ + prmem_pa = memblock_phys_alloc(prmem_size, PAGE_SIZE); + if (!prmem_pa) { + pr_warn("%s: Could not allocate initial memory\n", __func__); + goto free_metadata; + } + + /* Clear metadata. */ + prmem = __va(prmem_metadata); + memset(prmem, 0, sizeof(*prmem)); + return; + +free_metadata: + memblock_phys_free(prmem_metadata, PAGE_SIZE); + prmem = NULL; +} diff --git a/mm/mm_init.c b/mm/mm_init.c index a1963c3322af..f12757829281 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -24,6 +24,7 @@ #include <linux/page_ext.h> #include <linux/pti.h> #include <linux/pgtable.h> +#include <linux/prmem.h> #include <linux/swap.h> #include <linux/cma.h> #include "internal.h" @@ -2804,4 +2805,5 @@ void __init mm_core_init(void) pti_init(); kmsan_init_runtime(); mm_cache_init(); + prmem_init(); } -- 2.25.1