Re: [PATCH v13 07/13] x86/sgx: Add data structures for tracking the EPC pages

Dave Hansen <dave.hansen@xxxxxxxxx> · Mon, 27 Aug 2018 14:07:53 -0700

On 08/27/2018 11:53 AM, Jarkko Sakkinen wrote:
> Add data structures to track Enclave Page Cache (EPC) pages.  EPC is
> divided into multiple banks (1-N) of which addresses and sizes can be
> enumerated with CPUID by the OS.
> 
> On NUMA systems a node can have at most bank. A bank can be at most part of
> two nodes. SGX supports both nodes with a single memory controller and also
> sub-cluster nodes with severals memory controllers on a single die.
> 
> Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@xxxxxxxxxxxxxxx>
> Co-developed-by: Serge Ayoun <serge.ayoun@xxxxxxxxx>
> Co-developed-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> Signed-off-by: Serge Ayoun <serge.ayoun@xxxxxxxxx>
> Signed-off-by: Sean Christopherson <sean.j.christopherson@xxxxxxxxx>
> ---
>  arch/x86/include/asm/sgx.h      |  60 ++++++++++++++++++
>  arch/x86/kernel/cpu/intel_sgx.c | 106 +++++++++++++++++++++++++++++++-
>  2 files changed, 164 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/x86/include/asm/sgx.h b/arch/x86/include/asm/sgx.h
> index 2130e639ab49..17b7b3aa66bf 100644
> --- a/arch/x86/include/asm/sgx.h
> +++ b/arch/x86/include/asm/sgx.h
> @@ -4,9 +4,69 @@
>  #ifndef _ASM_X86_SGX_H
>  #define _ASM_X86_SGX_H
>  
> +#include <linux/bitops.h>
> +#include <linux/err.h>
> +#include <linux/rwsem.h>
>  #include <linux/types.h>
> +#include <asm/sgx_arch.h>
> +#include <asm/asm.h>
> +
> +#define SGX_MAX_EPC_BANKS 8

This is _still_ missing a meaningful description of what a bank is and
whether it is a hardware or software structure.

It would also help us to determine whether your bit packing below is
really required.

> +struct sgx_epc_page {
> +	unsigned long desc;
> +	struct list_head list;
> +};
> +
> +struct sgx_epc_bank {
> +	unsigned long pa;
> +	void *va;
> +	unsigned long size;

Please add units.  size could be bytes or pages, or who knows what.  I
can't tell you how many bugs I've tripped over in the past from simple
unit conversions

> +	struct sgx_epc_page *pages_data;
> +	struct sgx_epc_page **pages;
> +	unsigned long free_cnt;
> +	spinlock_t lock;
> +};
>  
>  extern bool sgx_enabled;
>  extern bool sgx_lc_enabled;
> +extern struct sgx_epc_bank sgx_epc_banks[SGX_MAX_EPC_BANKS];
> +
> +/*
> + * enum sgx_epc_page_desc - defines bits and masks for an EPC page's desc

Why are you bothering packing these bits?  This seems a rather
convoluted way to store two integers.

> +static __init int sgx_init_epc_bank(u64 addr, u64 size, unsigned long index,
> +				    struct sgx_epc_bank *bank)
> +{
> +	unsigned long nr_pages = size >> PAGE_SHIFT;
> +	struct sgx_epc_page *pages_data;
> +	unsigned long i;
> +	void *va;
> +
> +	va = ioremap_cache(addr, size);
> +	if (!va)
> +		return -ENOMEM;
> +
> +	pages_data = kcalloc(nr_pages, sizeof(struct sgx_epc_page), GFP_KERNEL);
> +	if (!pages_data)
> +		goto out_iomap;

This looks like you're roughly limited by the page allocator to a bank
size of ~1.4GB which seems kinda small.  Is this really OK?

> +	bank->pages = kcalloc(nr_pages, sizeof(struct sgx_epc_page *),
> +			      GFP_KERNEL);
> +	if (!bank->pages)
> +		goto out_pdata;
> +
> +	for (i = 0; i < nr_pages; i++) {
> +		bank->pages[i] = &pages_data[i];
> +		bank->pages[i]->desc = (addr + (i << PAGE_SHIFT)) | index;
> +	}
> +
> +	bank->pa = addr;
> +	bank->size = size;
> +	bank->va = va;
> +	bank->free_cnt = nr_pages;
> +	bank->pages_data = pages_data;
> +	spin_lock_init(&bank->lock);
> +	return 0;
> +out_pdata:
> +	kfree(pages_data);
> +out_iomap:
> +	iounmap(va);
> +	return -ENOMEM;
> +}
> +
> +static __init void sgx_page_cache_teardown(void)
> +{
> +	struct sgx_epc_bank *bank;
> +	int i;
> +
> +	for (i = 0; i < sgx_nr_epc_banks; i++) {
> +		bank = &sgx_epc_banks[i];
> +		iounmap((void *)bank->va);
> +		kfree(bank->pages);
> +		kfree(bank->pages_data);
> +	}
> +}
> +
> +static inline u64 sgx_combine_bank_regs(u64 low, u64 high)
> +{
> +	return (low & 0xFFFFF000) + ((high & 0xFFFFF) << 32);
> +}

-ENOCOMMENT for a rather weird looking calculation

> +static __init int sgx_page_cache_init(void)
> +{
> +	u32 eax, ebx, ecx, edx;
> +	u64 pa, size;
> +	int ret;
> +	int i;
> +
> +	for (i = 0; i < SGX_MAX_EPC_BANKS; i++) {
> +		cpuid_count(SGX_CPUID, 2 + i, &eax, &ebx, &ecx, &edx);
> +		if (!(eax & 0xF))
> +			break;

So, we have random data coming out of a random CPUID leaf being called
'eax' and then being tested against a random hard-coded mask.  This
seems rather unfortunate for someone trying to understand the code.  Can
we do better?

> +		pa = sgx_combine_bank_regs(eax, ebx);
> +		size = sgx_combine_bank_regs(ecx, edx);
> +		pr_info("EPC bank 0x%llx-0x%llx\n", pa, pa + size - 1);
> +		ret = sgx_init_epc_bank(pa, size, i, &sgx_epc_banks[i]);
> +		if (ret) {
> +			sgx_page_cache_teardown();
> +			return ret;
> +		}

So if one bank fails, we tear down all banks, yet leave sgx_nr_epc_banks
incremented?  That sounds troublesome.

> +		sgx_nr_epc_banks++;
> +	}
> +
> +	if (!sgx_nr_epc_banks) {
> +		pr_err("There are zero EPC banks.\n");
> +		return -ENODEV;
> +	}
> +
> +	return 0;
> +}

Does this support hot-addition of a bank?  If not, why not?