Re: [RFC PATCH v4 14/28] Add support to access boot related data in the clear

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Feb 16, 2017 at 09:45:09AM -0600, Tom Lendacky wrote:
> Boot data (such as EFI related data) is not encrypted when the system is
> booted and needs to be mapped decrypted.  Add support to apply the proper
> attributes to the EFI page tables and to the early_memremap and memremap
> APIs to identify the type of data being accessed so that the proper
> encryption attribute can be applied.

So this doesn't even begin to explain *why* we need this. The emphasis
being on *why*.

Lemme guess? kexec? And because of efi_reuse_config?

If so, then that whole ad-hoc caching in parse_setup_data() needs to go.
Especially if efi_reuse_config() already sees those addresses so while
we're there, we could save them somewhere or whatnot. But not doing the
whole thing again in parse_setup_data().

> Signed-off-by: Tom Lendacky <thomas.lendacky@xxxxxxx>
> ---
>  arch/x86/include/asm/io.h      |    3 +
>  arch/x86/include/asm/setup.h   |    8 +++
>  arch/x86/kernel/setup.c        |   33 ++++++++++++
>  arch/x86/mm/ioremap.c          |  111 ++++++++++++++++++++++++++++++++++++++++
>  arch/x86/platform/efi/efi_64.c |   16 ++++--
>  kernel/memremap.c              |   11 ++++
>  mm/early_ioremap.c             |   18 +++++-
>  7 files changed, 192 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
> index 7afb0e2..833f7cc 100644
> --- a/arch/x86/include/asm/io.h
> +++ b/arch/x86/include/asm/io.h
> @@ -381,4 +381,7 @@ extern int __must_check arch_phys_wc_add(unsigned long base,
>  #define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc
>  #endif
>  
> +extern bool arch_memremap_do_ram_remap(resource_size_t offset, size_t size);
> +#define arch_memremap_do_ram_remap arch_memremap_do_ram_remap
> +
>  #endif /* _ASM_X86_IO_H */
> diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
> index ac1d5da..99998d9 100644
> --- a/arch/x86/include/asm/setup.h
> +++ b/arch/x86/include/asm/setup.h
> @@ -63,6 +63,14 @@ static inline void x86_ce4100_early_setup(void) { }
>  #include <asm/espfix.h>
>  #include <linux/kernel.h>
>  
> +struct setup_data_attrs {
> +	u64 paddr;
> +	unsigned long size;
> +};
> +
> +extern struct setup_data_attrs setup_data_list[];
> +extern unsigned int setup_data_list_count;
> +
>  /*
>   * This is set up by the setup-routine at boot-time
>   */
> diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
> index bd5b9a7..d2234bf 100644
> --- a/arch/x86/kernel/setup.c
> +++ b/arch/x86/kernel/setup.c
> @@ -148,6 +148,9 @@ int default_check_phys_apicid_present(int phys_apicid)
>  
>  struct boot_params boot_params;
>  
> +struct setup_data_attrs setup_data_list[32];
> +unsigned int setup_data_list_count;
> +
>  /*
>   * Machine setup..
>   */
> @@ -419,6 +422,32 @@ static void __init reserve_initrd(void)
>  }
>  #endif /* CONFIG_BLK_DEV_INITRD */
>  
> +static void __init update_setup_data_list(u64 pa_data, unsigned long size)
> +{
> +	unsigned int i;
> +
> +	for (i = 0; i < setup_data_list_count; i++) {
> +		if (setup_data_list[i].paddr != pa_data)
> +			continue;
> +
> +		setup_data_list[i].size = size;
> +		break;
> +	}
> +}
> +
> +static void __init add_to_setup_data_list(u64 pa_data, unsigned long size)
> +{
> +	if (!sme_active())
> +		return;
> +
> +	if (!WARN(setup_data_list_count == ARRAY_SIZE(setup_data_list),
> +		  "exceeded maximum setup data list slots")) {
> +		setup_data_list[setup_data_list_count].paddr = pa_data;
> +		setup_data_list[setup_data_list_count].size = size;
> +		setup_data_list_count++;
> +	}
> +}
> +
>  static void __init parse_setup_data(void)
>  {
>  	struct setup_data *data;
> @@ -428,12 +457,16 @@ static void __init parse_setup_data(void)
>  	while (pa_data) {
>  		u32 data_len, data_type;
>  
> +		add_to_setup_data_list(pa_data, sizeof(*data));
> +
>  		data = early_memremap(pa_data, sizeof(*data));
>  		data_len = data->len + sizeof(struct setup_data);
>  		data_type = data->type;
>  		pa_next = data->next;
>  		early_memunmap(data, sizeof(*data));
>  
> +		update_setup_data_list(pa_data, data_len);
> +
>  		switch (data_type) {
>  		case SETUP_E820_EXT:
>  			e820__memory_setup_extended(pa_data, data_len);
> diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
> index 2385e70..b0ff6bc 100644
> --- a/arch/x86/mm/ioremap.c
> +++ b/arch/x86/mm/ioremap.c
> @@ -13,6 +13,7 @@
>  #include <linux/slab.h>
>  #include <linux/vmalloc.h>
>  #include <linux/mmiotrace.h>
> +#include <linux/efi.h>
>  
>  #include <asm/cacheflush.h>
>  #include <asm/e820/api.h>
> @@ -21,6 +22,7 @@
>  #include <asm/tlbflush.h>
>  #include <asm/pgalloc.h>
>  #include <asm/pat.h>
> +#include <asm/setup.h>
>  
>  #include "physaddr.h"
>  
> @@ -419,6 +421,115 @@ void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr)
>  	iounmap((void __iomem *)((unsigned long)addr & PAGE_MASK));
>  }
>  
> +/*
> + * Examine the physical address to determine if it is boot data. Check
> + * it against the boot params structure and EFI tables.
> + */
> +static bool memremap_is_setup_data(resource_size_t phys_addr,
> +				   unsigned long size)
> +{
> +	unsigned int i;
> +	u64 paddr;
> +
> +	for (i = 0; i < setup_data_list_count; i++) {
> +		if (phys_addr < setup_data_list[i].paddr)
> +			continue;
> +
> +		if (phys_addr >= (setup_data_list[i].paddr +
> +				  setup_data_list[i].size))
> +			continue;
> +
> +		/* Address is within setup data range */
> +		return true;
> +	}
> +
> +	paddr = boot_params.efi_info.efi_memmap_hi;
> +	paddr <<= 32;
> +	paddr |= boot_params.efi_info.efi_memmap;
> +	if (phys_addr == paddr)
> +		return true;
> +
> +	paddr = boot_params.efi_info.efi_systab_hi;
> +	paddr <<= 32;
> +	paddr |= boot_params.efi_info.efi_systab;
> +	if (phys_addr == paddr)
> +		return true;
> +
> +	if (efi_table_address_match(phys_addr))
> +		return true;
> +
> +	return false;
> +}
> +
> +/*
> + * This function determines if an address should be mapped encrypted.
> + * Boot setup data, EFI data and E820 areas are checked in making this
> + * determination.
> + */
> +static bool memremap_should_map_encrypted(resource_size_t phys_addr,
> +					  unsigned long size)
> +{
> +	/*
> +	 * SME is not active, return true:
> +	 *   - For early_memremap_pgprot_adjust(), returning true or false
> +	 *     results in the same protection value
> +	 *   - For arch_memremap_do_ram_remap(), returning true will allow
> +	 *     the RAM remap to occur instead of falling back to ioremap()
> +	 */
> +	if (!sme_active())
> +		return true;
> +
> +	/* Check if the address is part of the setup data */
> +	if (memremap_is_setup_data(phys_addr, size))
> +		return false;
> +
> +	/* Check if the address is part of EFI boot/runtime data */
> +	switch (efi_mem_type(phys_addr)) {

arch/x86/built-in.o: In function `memremap_should_map_encrypted':
/home/boris/kernel/alt-linux/arch/x86/mm/ioremap.c:487: undefined reference to `efi_mem_type'
make: *** [vmlinux] Error 1

That's a !CONFIG_EFI .config.

> +	case EFI_BOOT_SERVICES_DATA:
> +	case EFI_RUNTIME_SERVICES_DATA:
> +		return false;
> +	default:
> +		break;
> +	}
> +
> +	/* Check if the address is outside kernel usable area */
> +	switch (e820__get_entry_type(phys_addr, phys_addr + size - 1)) {
> +	case E820_TYPE_RESERVED:
> +	case E820_TYPE_ACPI:
> +	case E820_TYPE_NVS:
> +	case E820_TYPE_UNUSABLE:
> +		return false;
> +	default:
> +		break;
> +	}
> +
> +	return true;
> +}
> +
> +/*
> + * Architecure function to determine if RAM remap is allowed.
> + */
> +bool arch_memremap_do_ram_remap(resource_size_t phys_addr, unsigned long size)
> +{
> +	return memremap_should_map_encrypted(phys_addr, size);
> +}
> +
> +/*
> + * Architecure override of __weak function to adjust the protection attributes
> + * used when remapping memory.
> + */
> +pgprot_t __init early_memremap_pgprot_adjust(resource_size_t phys_addr,
> +					     unsigned long size,
> +					     pgprot_t prot)
> +{
> +	if (memremap_should_map_encrypted(phys_addr, size))
> +		prot = pgprot_encrypted(prot);
> +	else
> +		prot = pgprot_decrypted(prot);
> +
> +	return prot;
> +}
> +
>  #ifdef CONFIG_ARCH_USE_MEMREMAP_PROT
>  /* Remap memory with encryption */
>  void __init *early_memremap_encrypted(resource_size_t phys_addr,
> diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c
> index 2ee7694..2d8674d 100644
> --- a/arch/x86/platform/efi/efi_64.c
> +++ b/arch/x86/platform/efi/efi_64.c
> @@ -243,7 +243,7 @@ void efi_sync_low_kernel_mappings(void)
>  
>  int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  {
> -	unsigned long pfn, text;
> +	unsigned long pfn, text, pf;
>  	struct page *page;
>  	unsigned npages;
>  	pgd_t *pgd;
> @@ -251,7 +251,13 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	if (efi_enabled(EFI_OLD_MEMMAP))
>  		return 0;
>  
> -	efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd);
> +	/*
> +	 * Since the PGD is encrypted, set the encryption mask so that when
> +	 * this value is loaded into cr3 the PGD will be decrypted during
> +	 * the pagetable walk.
> +	 */
> +	efi_scratch.efi_pgt = (pgd_t *)__sme_pa(efi_pgd);
> +
>  	pgd = efi_pgd;
>  
>  	/*
> @@ -261,7 +267,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	 * phys_efi_set_virtual_address_map().
>  	 */
>  	pfn = pa_memmap >> PAGE_SHIFT;
> -	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX | _PAGE_RW)) {
> +	pf = _PAGE_NX | _PAGE_RW | _PAGE_ENC;
> +	if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, pf)) {
>  		pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap);
>  		return 1;
>  	}
> @@ -304,7 +311,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages)
>  	text = __pa(_text);
>  	pfn = text >> PAGE_SHIFT;
>  
> -	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, _PAGE_RW)) {
> +	pf = _PAGE_RW | _PAGE_ENC;
> +	if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, pf)) {
>  		pr_err("Failed to map kernel text 1:1\n");
>  		return 1;
>  	}

Those changes should be in a separate patch IMHO.

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux