Re: [RFC PATCH v9 04/13] xpfo, x86: Add support for XPFO for x86-64

Peter Zijlstra <peterz@xxxxxxxxxxxxx> · Thu, 4 Apr 2019 09:52:06 +0200

On Wed, Apr 03, 2019 at 11:34:05AM -0600, Khalid Aziz wrote:
> diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
> index 2779ace16d23..5c0e1581fa56 100644
> --- a/arch/x86/include/asm/pgtable.h
> +++ b/arch/x86/include/asm/pgtable.h
> @@ -1437,6 +1437,32 @@ static inline bool arch_has_pfn_modify_check(void)
>  	return boot_cpu_has_bug(X86_BUG_L1TF);
>  }
>  
> +/*
> + * The current flushing context - we pass it instead of 5 arguments:
> + */
> +struct cpa_data {
> +	unsigned long	*vaddr;
> +	pgd_t		*pgd;
> +	pgprot_t	mask_set;
> +	pgprot_t	mask_clr;
> +	unsigned long	numpages;
> +	unsigned long	curpage;
> +	unsigned long	pfn;
> +	unsigned int	flags;
> +	unsigned int	force_split		: 1,
> +			force_static_prot	: 1;
> +	struct page	**pages;
> +};
> +
> +
> +int
> +should_split_large_page(pte_t *kpte, unsigned long address,
> +			struct cpa_data *cpa);
> +extern spinlock_t cpa_lock;
> +int
> +__split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
> +		   struct page *base);
> +

I really hate exposing all that.

>  #include <asm-generic/pgtable.h>
>  #endif	/* __ASSEMBLY__ */
>  

> diff --git a/arch/x86/mm/xpfo.c b/arch/x86/mm/xpfo.c
> new file mode 100644
> index 000000000000..3045bb7e4659
> --- /dev/null
> +++ b/arch/x86/mm/xpfo.c
> @@ -0,0 +1,123 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2017 Hewlett Packard Enterprise Development, L.P.
> + * Copyright (C) 2016 Brown University. All rights reserved.
> + *
> + * Authors:
> + *   Juerg Haefliger <juerg.haefliger@xxxxxxx>
> + *   Vasileios P. Kemerlis <vpk@xxxxxxxxxxxx>
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of the GNU General Public License version 2 as published by
> + * the Free Software Foundation.
> + */
> +
> +#include <linux/mm.h>
> +
> +#include <asm/tlbflush.h>
> +
> +extern spinlock_t cpa_lock;
> +
> +/* Update a single kernel page table entry */
> +inline void set_kpte(void *kaddr, struct page *page, pgprot_t prot)
> +{
> +	unsigned int level;
> +	pgprot_t msk_clr;
> +	pte_t *pte = lookup_address((unsigned long)kaddr, &level);
> +
> +	if (unlikely(!pte)) {
> +		WARN(1, "xpfo: invalid address %p\n", kaddr);
> +		return;
> +	}
> +
> +	switch (level) {
> +	case PG_LEVEL_4K:
> +		set_pte_atomic(pte, pfn_pte(page_to_pfn(page),
> +			       canon_pgprot(prot)));

(sorry, do we also need a nikon_pgprot() ? :-)

> +		break;
> +	case PG_LEVEL_2M:
> +	case PG_LEVEL_1G: {
> +		struct cpa_data cpa = { };
> +		int do_split;
> +
> +		if (level == PG_LEVEL_2M)
> +			msk_clr = pmd_pgprot(*(pmd_t *)pte);
> +		else
> +			msk_clr = pud_pgprot(*(pud_t *)pte);
> +
> +		cpa.vaddr = kaddr;
> +		cpa.pages = &page;
> +		cpa.mask_set = prot;
> +		cpa.mask_clr = msk_clr;
> +		cpa.numpages = 1;
> +		cpa.flags = 0;
> +		cpa.curpage = 0;
> +		cpa.force_split = 0;
> +
> +
> +		do_split = should_split_large_page(pte, (unsigned long)kaddr,
> +						   &cpa);
> +		if (do_split) {
> +			struct page *base;
> +
> +			base = alloc_pages(GFP_ATOMIC, 0);
> +			if (!base) {
> +				WARN(1, "xpfo: failed to split large page\n");

You have to be fcking kidding right? A WARN when a GFP_ATOMIC allocation
fails?!

> +				break;
> +			}
> +
> +			if (!debug_pagealloc_enabled())
> +				spin_lock(&cpa_lock);
> +			if  (__split_large_page(&cpa, pte, (unsigned long)kaddr,
> +						base) < 0) {
> +				__free_page(base);
> +				WARN(1, "xpfo: failed to split large page\n");
> +			}
> +			if (!debug_pagealloc_enabled())
> +				spin_unlock(&cpa_lock);
> +		}
> +
> +		break;

Ever heard of helper functions?

> +	}
> +	case PG_LEVEL_512G:
> +		/* fallthrough, splitting infrastructure doesn't
> +		 * support 512G pages.
> +		 */

Broken coment style.

> +	default:
> +		WARN(1, "xpfo: unsupported page level %x\n", level);
> +	}
> +
> +}
> +EXPORT_SYMBOL_GPL(set_kpte);
> +
> +inline void xpfo_flush_kernel_tlb(struct page *page, int order)
> +{
> +	int level;
> +	unsigned long size, kaddr;
> +
> +	kaddr = (unsigned long)page_address(page);
> +
> +	if (unlikely(!lookup_address(kaddr, &level))) {
> +		WARN(1, "xpfo: invalid address to flush %lx %d\n", kaddr,
> +		     level);
> +		return;
> +	}
> +
> +	switch (level) {
> +	case PG_LEVEL_4K:
> +		size = PAGE_SIZE;
> +		break;
> +	case PG_LEVEL_2M:
> +		size = PMD_SIZE;
> +		break;
> +	case PG_LEVEL_1G:
> +		size = PUD_SIZE;
> +		break;
> +	default:
> +		WARN(1, "xpfo: unsupported page level %x\n", level);
> +		return;
> +	}
> +
> +	flush_tlb_kernel_range(kaddr, kaddr + (1 << order) * size);
> +}

You call this from IRQ/IRQ-disabled context... that _CANNOT_ be right.