Re: [PATCH] x86/mm: Rewrite sme_populate_pgd() in a more sensible way

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 12/4/2017 10:34 AM, Kirill A. Shutemov wrote:
On Mon, Dec 04, 2017 at 04:00:26PM +0000, Tom Lendacky wrote:
On 12/4/2017 8:57 AM, Kirill A. Shutemov wrote:
On Mon, Dec 04, 2017 at 08:19:11AM -0600, Tom Lendacky wrote:
On 12/4/2017 5:23 AM, Kirill A. Shutemov wrote:
sme_populate_pgd() open-codes a lot of things that are not needed to be
open-coded.

Let's rewrite it in a more stream-lined way.

This would also buy us boot-time switching between support between
paging modes, when rest of the pieces will be upstream.

Hi Kirill,

Unfortunately, some of these can't be changed.  The use of p4d_offset(),
pud_offset(), etc., use non-identity mapped virtual addresses which cause
failures at this point of the boot process.

Wat? Virtual address is virtual address. p?d_offset() doesn't care about
what mapping you're using.

Yes it does.  For example, pmd_offset() issues a pud_page_addr() call,
which does a __va() returning a non-identity mapped address (0xffff88...).
Only identity mapped virtual addresses have been setup at this point, so
the use of that virtual address panics the kernel.

Stupid me. You are right.

What about something like this:

diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c
index d9a9e9fc75dd..65e0d68f863f 100644
--- a/arch/x86/mm/mem_encrypt.c
+++ b/arch/x86/mm/mem_encrypt.c
@@ -12,6 +12,23 @@
#define DISABLE_BRANCH_PROFILING +/*
+ * Since we're dealing with identity mappings, physical and virtual
+ * addresses are the same, so override these defines which are ultimately
+ * used by the headers in misc.h.
+ */
+#define __pa(x)  ((unsigned long)(x))
+#define __va(x)  ((void *)((unsigned long)(x)))

No, you can't do this.  There are routines in this file that are called
after the kernel has switched to its standard virtual address map where
this definition of __va() will likely cause a failure.

+
+/*
+ * Special hack: we have to be careful, because no indirections are
+ * allowed here, and paravirt_ops is a kind of one. As it will only run in
+ * baremetal anyway, we just keep it from happening. (This list needs to
+ * be extended when new paravirt and debugging variants are added.)
+ */
+#undef CONFIG_PARAVIRT
+#undef CONFIG_PARAVIRT_SPINLOCKS

I'd really, really like to avoid doing something like this.

Thanks,
Tom

+
  #include <linux/linkage.h>
  #include <linux/init.h>
  #include <linux/mm.h>
@@ -489,73 +506,42 @@ static void __init sme_clear_pgd(pgd_t *pgd_base, unsigned long start,
  static void __init *sme_populate_pgd(pgd_t *pgd_base, void *pgtable_area,
  				     unsigned long vaddr, pmdval_t pmd_val)
  {
-	pgd_t *pgd_p;
-	p4d_t *p4d_p;
-	pud_t *pud_p;
-	pmd_t *pmd_p;
-
-	pgd_p = pgd_base + pgd_index(vaddr);
-	if (native_pgd_val(*pgd_p)) {
-		if (IS_ENABLED(CONFIG_X86_5LEVEL))
-			p4d_p = (p4d_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
-		else
-			pud_p = (pud_t *)(native_pgd_val(*pgd_p) & ~PTE_FLAGS_MASK);
-	} else {
-		pgd_t pgd;
-
-		if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-			p4d_p = pgtable_area;
-			memset(p4d_p, 0, sizeof(*p4d_p) * PTRS_PER_P4D);
-			pgtable_area += sizeof(*p4d_p) * PTRS_PER_P4D;
-
-			pgd = native_make_pgd((pgdval_t)p4d_p + PGD_FLAGS);
-		} else {
-			pud_p = pgtable_area;
-			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
-
-			pgd = native_make_pgd((pgdval_t)pud_p + PGD_FLAGS);
-		}
-		native_set_pgd(pgd_p, pgd);
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+
+	pgd = pgd_base + pgd_index(vaddr);
+	if (pgd_none(*pgd)) {
+		p4d = pgtable_area;
+		memset(p4d, 0, sizeof(*p4d) * PTRS_PER_P4D);
+		pgtable_area += sizeof(*p4d) * PTRS_PER_P4D;
+		set_pgd(pgd, __pgd(PGD_FLAGS | __pa(p4d)));
  	}
- if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
-		p4d_p += p4d_index(vaddr);
-		if (native_p4d_val(*p4d_p)) {
-			pud_p = (pud_t *)(native_p4d_val(*p4d_p) & ~PTE_FLAGS_MASK);
-		} else {
-			p4d_t p4d;
-
-			pud_p = pgtable_area;
-			memset(pud_p, 0, sizeof(*pud_p) * PTRS_PER_PUD);
-			pgtable_area += sizeof(*pud_p) * PTRS_PER_PUD;
-
-			p4d = native_make_p4d((pudval_t)pud_p + P4D_FLAGS);
-			native_set_p4d(p4d_p, p4d);
-		}
+	p4d = p4d_offset(pgd, vaddr);
+	if (p4d_none(*p4d)) {
+		pud = pgtable_area;
+		memset(pud, 0, sizeof(*pud) * PTRS_PER_PUD);
+		pgtable_area += sizeof(*pud) * PTRS_PER_PUD;
+		set_p4d(p4d, __p4d(P4D_FLAGS | __pa(pud)));
  	}
- pud_p += pud_index(vaddr);
-	if (native_pud_val(*pud_p)) {
-		if (native_pud_val(*pud_p) & _PAGE_PSE)
-			goto out;
-
-		pmd_p = (pmd_t *)(native_pud_val(*pud_p) & ~PTE_FLAGS_MASK);
-	} else {
-		pud_t pud;
-
-		pmd_p = pgtable_area;
-		memset(pmd_p, 0, sizeof(*pmd_p) * PTRS_PER_PMD);
-		pgtable_area += sizeof(*pmd_p) * PTRS_PER_PMD;
-
-		pud = native_make_pud((pmdval_t)pmd_p + PUD_FLAGS);
-		native_set_pud(pud_p, pud);
+	pud = pud_offset(p4d, vaddr);
+	if (pud_none(*pud)) {
+		pmd = pgtable_area;
+		memset(pmd, 0, sizeof(*pmd) * PTRS_PER_PMD);
+		pgtable_area += sizeof(*pmd) * PTRS_PER_PMD;
+		set_pud(pud, __pud(PUD_FLAGS | __pa(pmd)));
  	}
+	if (pud_large(*pud))
+		goto out;
- pmd_p += pmd_index(vaddr);
-	if (!native_pmd_val(*pmd_p) || !(native_pmd_val(*pmd_p) & _PAGE_PSE))
-		native_set_pmd(pmd_p, native_make_pmd(pmd_val));
+	pmd = pmd_offset(pud, vaddr);
+	if (pmd_large(*pmd))
+		goto out;
+ set_pmd(pmd, __pmd(pmd_val));
  out:
  	return pgtable_area;
  }


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@xxxxxxxxx.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@xxxxxxxxx";> email@xxxxxxxxx </a>



[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]
  Powered by Linux