Re: [RFC PATCH] kaslr: get ACPI SRAT table to avoid movable memory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

Here is my POC mail:
https://www.spinics.net/lists/kernel/msg2571811.html

Since no reply, so I made this RFC PATCH.
I ran it in QEMU guest. It can get and print the mem_affinity.
But no physical machine available right now.

If there is something wrong, please let me know.
If someone has a better method to handle the movable memory,
please tell me.

Thanks,
Chao Fan

On Fri, Aug 18, 2017 at 04:58:20PM +0800, Chao Fan wrote:
>KASLR should choose the memory region of immovable node to extract kernel.
>So get ACPI SRAT table and store the memory region of movable node which
>kaslr shold avoid.
>
>Signed-off-by: Chao Fan <fanc.fnst@xxxxxxxxxxxxxx>
>---
> arch/x86/boot/compressed/kaslr.c | 231 +++++++++++++++++++++++++++++++++++++++
> arch/x86/boot/compressed/misc.h  |  27 +++++
> 2 files changed, 258 insertions(+)
>
>diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c
>index 7de23bb279ce..3b8c111b8a84 100644
>--- a/arch/x86/boot/compressed/kaslr.c
>+++ b/arch/x86/boot/compressed/kaslr.c
>@@ -45,6 +45,11 @@
> #define STATIC
> #include <linux/decompress/mm.h>
> 
>+#include <linux/efi.h>
>+#include <linux/acpi.h>
>+#include <linux/numa.h>
>+#include <asm/efi.h>
>+
> extern unsigned long get_cmd_line_ptr(void);
> 
> /* Simplified build-specific string for starting entropy. */
>@@ -94,6 +99,18 @@ static bool memmap_too_large;
> /* Store memory limit specified by "mem=nn[KMG]" or "memmap=nn[KMG]" */
> unsigned long long mem_limit = ULLONG_MAX;
> 
>+/* Store the max numbers of acpi tables */
>+#define ACPI_MAX_TABLES		128
>+
>+/* Store the movable memory */
>+static struct {
>+	u64 start;
>+	u64 end;
>+} movable_mem[MAX_NUMNODES*2];
>+
>+/* Store the num of movable mem affinity */
>+static int num_movable_ma;
>+
> 
> enum mem_avoid_index {
> 	MEM_AVOID_ZO_RANGE = 0,
>@@ -257,6 +274,180 @@ static int handle_mem_memmap(void)
> 	return 0;
> }
> 
>+static void handle_movable_node(void)
>+{
>+	struct acpi_table_desc table_descs[ACPI_MAX_TABLES];
>+	struct acpi_table_header *table_header;
>+	struct acpi_srat_mem_affinity *ma;
>+	struct acpi_subtable_header *asth;
>+	acpi_physical_address root_table;
>+	acpi_physical_address acpi_table;
>+	acpi_physical_address rsdp_addr;
>+	struct acpi_table_header *th;
>+	efi_system_table_t *systab;
>+	unsigned long table_size;
>+	unsigned long table_end;
>+	bool use_rsdt = false;
>+	bool acpi_20 = false;
>+	bool efi_64 = false;
>+	void *config_tables;
>+	int size, total_size;
>+	u32 table_entry_size;
>+	struct efi_info *e;
>+	u8 *table_entry;
>+	u32 table_count;
>+	char *args;
>+	char *sig;
>+	u32 len;
>+	int i, j;
>+
>+	args = (char *)get_cmd_line_ptr();
>+	if (!strstr(args, "movable_node"))
>+		return;
>+
>+	e = &boot_params->efi_info;
>+	sig = (char *)&e->efi_loader_signature;
>+
>+	if (!strncmp(sig, EFI64_LOADER_SIGNATURE, 4))
>+		efi_64 = true;
>+	else if (!strncmp(sig, EFI32_LOADER_SIGNATURE, 4))
>+		efi_64 = false;
>+	else {
>+		debug_putstr("Wrong efi loader signature.\n");
>+		return;
>+	}
>+
>+	// Get systab from boot params
>+#ifdef CONFIG_X86_32
>+	if (e->efi_systab_hi || e->efi_memmap_hi) {
>+		debug_putstr("Table located above 4GB, disabling EFI.\n");
>+		return;
>+	}
>+	systab = (efi_system_table_t *)e->efi_systab;
>+#else
>+	systab = (efi_system_table_t *)(e->efi_systab |
>+			((__u64)e->efi_systab_hi<<32));
>+#endif
>+
>+	// Get efi tables from systab
>+	size = efi_64 ? sizeof(efi_config_table_64_t) :
>+			sizeof(efi_config_table_32_t);
>+	total_size = systab->nr_tables * size;
>+
>+	for (i = 0; i < systab->nr_tables; i++) {
>+		efi_guid_t guid;
>+		unsigned long table;
>+
>+		config_tables = (void *)(systab->tables + size * i);
>+		if (efi_64) {
>+			efi_config_table_64_t *tmp_table;
>+
>+			tmp_table = (efi_config_table_64_t *)config_tables;
>+			guid = tmp_table->guid;
>+			table = tmp_table->table;
>+#ifndef CONFIG_64BIT
>+			if (table >> 32) {
>+				debug_putstr
>+				("Table located above 4G, disabling EFI.\n");
>+				return -EINVAL;
>+			}
>+#endif
>+		} else {
>+			efi_config_table_32_t *tmp_table;
>+
>+			tmp_table = (efi_config_table_32_t *)config_tables;
>+			guid = tmp_table->guid;
>+			table = tmp_table->table;
>+		}
>+
>+		// Get rsdp from efi tables
>+		if (!(efi_guidcmp(guid, ACPI_TABLE_GUID)) && !acpi_20) {
>+			rsdp_addr = (acpi_physical_address)table;
>+			acpi_20 = false;
>+		} else if (!(efi_guidcmp(guid, ACPI_20_TABLE_GUID))) {
>+			rsdp_addr = (acpi_physical_address)table;
>+			acpi_20 = true;
>+		}
>+	}
>+
>+	// Get rsdt or xsdt from rsdp
>+	if (strstr(args, "acpi=rsdt"))
>+		use_rsdt = true;
>+
>+	if (!(use_rsdt) && (acpi_20) &&
>+		((((struct acpi_table_rsdp *)rsdp_addr)->revision) > 1)) {
>+		root_table = ((struct acpi_table_rsdp *)
>+				rsdp_addr)->xsdt_physical_address;
>+		table_entry_size = ACPI_XSDT_ENTRY_SIZE;
>+	} else {
>+		root_table = ((struct acpi_table_rsdp *)
>+				rsdp_addr)->rsdt_physical_address;
>+		table_entry_size = ACPI_RSDT_ENTRY_SIZE;
>+	}
>+
>+	// Get acpi root table from rsdt or xsdt
>+	th = (struct acpi_table_header *)root_table;
>+	len = th->length;
>+	table_count = (u32)((len - sizeof(struct acpi_table_header)) /
>+				table_entry_size);
>+	table_entry = ACPI_ADD_PTR(u8, th, sizeof(struct acpi_table_header));
>+
>+	for (i = 0; i < table_count; i++) {
>+		u64 address64;
>+
>+		memset(&table_descs[i], 0, sizeof(struct acpi_table_desc));
>+		if (table_entry_size == ACPI_RSDT_ENTRY_SIZE)
>+			acpi_table = ((acpi_physical_address)
>+					(*ACPI_CAST_PTR(u32, table_entry)));
>+		else {
>+			ACPI_MOVE_64_TO_64(&address64, table_entry);
>+			acpi_table = (acpi_physical_address) address64;
>+		}
>+
>+		if (acpi_table) {
>+			table_descs[i].address = acpi_table;
>+			table_descs[i].length =
>+				sizeof(struct acpi_table_header);
>+			table_descs[i].pointer =
>+				(struct acpi_table_header *)acpi_table;
>+			for (j = 0; j < 4; j++)
>+				table_descs[i].signature.ascii[j] =
>+					((struct acpi_table_header *)
>+					 acpi_table)->signature[j];
>+		}
>+
>+		if (!strncmp(table_descs[i].signature.ascii, "SRAT", 4)) {
>+			table_header = table_descs[i].pointer;
>+			break;
>+		}
>+
>+		table_entry += table_entry_size;
>+	}
>+
>+	// Get acpi srat mem affinity frpm acpi root table
>+	table_size = sizeof(struct acpi_table_srat);
>+	table_end = (unsigned long)table_header + table_header->length;
>+	asth = (struct acpi_subtable_header *)
>+		((unsigned long)table_header + table_size);
>+	j = 0;
>+
>+	while (((unsigned long)asth) +
>+			sizeof(struct acpi_subtable_header) < table_end) {
>+		if (asth->type == 1) {
>+			ma = (struct acpi_srat_mem_affinity *)asth;
>+			if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) {
>+				movable_mem[j].start = ma->base_address;
>+				movable_mem[j].end = ma->base_address +
>+						     ma->length - 1;
>+				j++;
>+			}
>+		}
>+		asth = (struct acpi_subtable_header *)
>+			((unsigned long)asth + asth->length);
>+	}
>+	num_movable_ma = j;
>+}
>+
> /*
>  * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T).
>  * The mem_avoid array is used to store the ranges that need to be avoided
>@@ -380,6 +571,11 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size,
> 	/* Mark the memmap regions we need to avoid */
> 	handle_mem_memmap();
> 
>+#ifdef CONFIG_EFI
>+	/* Mark the hotplug SB regions we need choose */
>+	handle_movable_node();
>+#endif
>+
> #ifdef CONFIG_X86_VERBOSE_BOOTUP
> 	/* Make sure video RAM can be used. */
> 	add_identity_map(0, PMD_SIZE);
>@@ -481,6 +677,36 @@ static unsigned long slots_fetch_random(void)
> 	return 0;
> }
> 
>+static int check_movable_memory(struct mem_vector *entry)
>+{
>+	int i;
>+	unsigned long long start;
>+	unsigned long long end;
>+
>+	start = entry->start;
>+	end = entry->start + entry->size - 1;
>+
>+	if (num_movable_ma == 0)
>+		return 0;
>+
>+	for (i = 0; i < num_movable_ma; i++) {
>+		if ((start >= movable_mem[i].start) &&
>+		    (start <= movable_mem[i].end))
>+			return 1;
>+
>+		if ((end >= movable_mem[i].start) &&
>+		    (end <= movable_mem[i].end))
>+			return 1;
>+
>+		if (start > movable_mem[i].end)
>+			continue;
>+
>+		if (end < movable_mem[i].start)
>+			break;
>+	}
>+	return 0;
>+}
>+
> static void process_mem_region(struct mem_vector *entry,
> 			       unsigned long minimum,
> 			       unsigned long image_size)
>@@ -502,6 +728,11 @@ static void process_mem_region(struct mem_vector *entry,
> 	end = min(entry->size + entry->start, mem_limit);
> 	if (entry->start >= end)
> 		return;
>+
>+	/* Ignore the memory region of movable_node */
>+	if (check_movable_memory(entry))
>+		return;
>+
> 	cur_entry.start = entry->start;
> 	cur_entry.size = end - entry->start;
> 
>diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h
>index 766a5211f827..5f514959b2f1 100644
>--- a/arch/x86/boot/compressed/misc.h
>+++ b/arch/x86/boot/compressed/misc.h
>@@ -109,3 +109,30 @@ static inline void console_init(void)
> #endif
> 
> #endif
>+
>+#ifdef ACPI_BIG_ENDIAN
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[7]; \
>+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[6]; \
>+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[5]; \
>+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[4]; \
>+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[3]; \
>+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[2]; \
>+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[1]; \
>+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[0]; }
>+#else
>+#ifndef ACPI_MISALIGNMENT_NOT_SUPPORTED
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{*(u64 *)(void *)(d) = *(u64 *)(void *)(s)}
>+#else
>+#define ACPI_MOVE_64_TO_64(d, s) \
>+{((u8 *)(void *)(d))[0] = ((u8 *)(void *)(s))[0]; \
>+((u8 *)(void *)(d))[1] = ((u8 *)(void *)(s))[1]; \
>+((u8 *)(void *)(d))[2] = ((u8 *)(void *)(s))[2]; \
>+((u8 *)(void *)(d))[3] = ((u8 *)(void *)(s))[3]; \
>+((u8 *)(void *)(d))[4] = ((u8 *)(void *)(s))[4]; \
>+((u8 *)(void *)(d))[5] = ((u8 *)(void *)(s))[5]; \
>+((u8 *)(void *)(d))[6] = ((u8 *)(void *)(s))[6]; \
>+((u8 *)(void *)(d))[7] = ((u8 *)(void *)(s))[7]; }
>+#endif
>+#endif
>-- 
>2.13.4
>


--
To unsubscribe from this list: send the line "unsubscribe linux-acpi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux