[PATCH kvm-unit-tests v3 08/11] s390x: add vmalloc support

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



To use virtual addresses, we have to
- build page tables with identity mapping
- setup the primary ASCE in cr1
- enable DAT in the PSW

Not using the Linux definitions/implementation as they contain too much
software defined stuff / things we don't need.

Written from scratch. Tried to stick to the general Linux naming
schemes.

As we currently don't invalidate anything except page table entries, it
is sufficient to only use ipte for now.

Signed-off-by: David Hildenbrand <david@xxxxxxxxxx>
---
 lib/s390x/asm/arch_def.h |  46 ++++++++++
 lib/s390x/asm/page.h     |  24 +++++
 lib/s390x/asm/pgtable.h  | 224 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/s390x/mmu.c          | 104 ++++++++++++++++++++++
 lib/s390x/sclp.c         |   2 +
 s390x/Makefile           |   2 +
 6 files changed, 402 insertions(+)
 create mode 100644 lib/s390x/asm/pgtable.h
 create mode 100644 lib/s390x/mmu.c

diff --git a/lib/s390x/asm/arch_def.h b/lib/s390x/asm/arch_def.h
index ee4c96f..620dca4 100644
--- a/lib/s390x/asm/arch_def.h
+++ b/lib/s390x/asm/arch_def.h
@@ -15,6 +15,8 @@ struct psw {
 	uint64_t	addr;
 };
 
+#define PSW_MASK_DAT			0x0400000000000000UL
+
 struct lowcore {
 	uint8_t		pad_0x0000[0x0080 - 0x0000];	/* 0x0000 */
 	uint32_t	ext_int_param;			/* 0x0080 */
@@ -163,4 +165,48 @@ static inline int tprot(unsigned long addr)
 	return cc;
 }
 
+static inline void lctlg(int cr, uint64_t value)
+{
+	asm volatile(
+		"	lctlg	%1,%1,%0\n"
+		: : "Q" (value), "i" (cr));
+}
+
+static inline uint64_t stctg(int cr)
+{
+	uint64_t value;
+
+	asm volatile(
+		"	stctg	%1,%1,%0\n"
+		: "=Q" (value) : "i" (cr) : "memory");
+	return value;
+}
+
+static inline uint64_t extract_psw_mask(void)
+{
+	uint32_t mask_upper = 0, mask_lower = 0;
+
+	asm volatile(
+		"	epsw	%0,%1\n"
+		: "+r" (mask_upper), "+r" (mask_lower) : : );
+
+	return (uint64_t) mask_upper << 32 | mask_lower;
+}
+
+static inline void load_psw_mask(uint64_t mask)
+{
+	struct psw psw = {
+		.mask = mask,
+		.addr = 0,
+	};
+	uint64_t tmp = 0;
+
+	asm volatile(
+		"	larl	%0,0f\n"
+		"	stg	%0,8(%1)\n"
+		"	lpswe	0(%1)\n"
+		"0:\n"
+		: "+r" (tmp) :  "a" (&psw) : "memory", "cc" );
+}
+
 #endif
diff --git a/lib/s390x/asm/page.h b/lib/s390x/asm/page.h
index 141a456..bc19154 100644
--- a/lib/s390x/asm/page.h
+++ b/lib/s390x/asm/page.h
@@ -13,4 +13,28 @@
 
 #include <asm-generic/page.h>
 
+typedef uint64_t pgdval_t;		/* Region-1 table entry */
+typedef uint64_t p4dval_t;		/* Region-2 table entry*/
+typedef uint64_t pudval_t;		/* Region-3 table entry */
+typedef uint64_t pmdval_t;		/* Segment table entry */
+typedef uint64_t pteval_t;		/* Page table entry */
+
+typedef struct { pgdval_t pgd; } pgd_t;
+typedef struct { p4dval_t p4d; } p4d_t;
+typedef struct { pudval_t pud; } pud_t;
+typedef struct { pmdval_t pmd; } pmd_t;
+typedef struct { pteval_t pte; } pte_t;
+
+#define pgd_val(x)	((x).pgd)
+#define p4d_val(x)	((x).p4d)
+#define pud_val(x)	((x).pud)
+#define pmd_val(x)	((x).pmd)
+#define pte_val(x)	((x).pte)
+
+#define __pgd(x)	((pgd_t) { (x) } )
+#define __p4d(x)	((p4d_t) { (x) } )
+#define __pud(x)	((pud_t) { (x) } )
+#define __pmd(x)	((pmd_t) { (x) } )
+#define __pte(x)	((pte_t) { (x) } )
+
 #endif
diff --git a/lib/s390x/asm/pgtable.h b/lib/s390x/asm/pgtable.h
new file mode 100644
index 0000000..e15bee9
--- /dev/null
+++ b/lib/s390x/asm/pgtable.h
@@ -0,0 +1,224 @@
+/*
+ * s390x page table definitions and functions
+ *
+ * Copyright (c) 2017 Red Hat Inc
+ *
+ * Authors:
+ *  David Hildenbrand <david@xxxxxxxxxx>
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Library General Public License version 2.
+ */
+#ifndef _ASMS390X_PGTABLE_H_
+#define _ASMS390X_PGTABLE_H_
+
+#include <asm/page.h>
+#include <alloc_page.h>
+
+#define ASCE_ORIGIN			0xfffffffffffff000UL
+#define ASCE_G				0x0000000000000200UL
+#define ASCE_P				0x0000000000000100UL
+#define ASCE_S				0x0000000000000080UL
+#define ASCE_X				0x0000000000000040UL
+#define ASCE_R				0x0000000000000020UL
+#define ASCE_DT				0x000000000000000cUL
+#define ASCE_TL				0x0000000000000003UL
+
+#define ASCE_DT_REGION1			0x000000000000000cUL
+#define ASCE_DT_REGION2			0x0000000000000008UL
+#define ASCE_DT_REGION3			0x0000000000000004UL
+#define ASCE_DT_SEGMENT			0x0000000000000000UL
+
+#define REGION_TABLE_ORDER		2
+#define REGION_TABLE_ENTRIES		2048
+#define REGION_TABLE_LENGTH		3
+
+#define REGION1_SHIFT			53
+#define REGION2_SHIFT			42
+#define REGION3_SHIFT			31
+
+#define REGION_ENTRY_ORIGIN		0xfffffffffffff000UL
+#define REGION_ENTRY_P			0x0000000000000200UL
+#define REGION_ENTRY_TF			0x00000000000000c0UL
+#define REGION_ENTRY_I			0x0000000000000020UL
+#define REGION_ENTRY_TT			0x000000000000000cUL
+#define REGION_ENTRY_TL			0x0000000000000003UL
+
+#define REGION_ENTRY_TT_REGION1		0x000000000000000cUL
+#define REGION_ENTRY_TT_REGION2		0x0000000000000008UL
+#define REGION_ENTRY_TT_REGION3		0x0000000000000004UL
+
+#define REGION3_ENTRY_RFAA		0xffffffff80000000UL
+#define REGION3_ENTRY_AV		0x0000000000010000UL
+#define REGION3_ENTRY_ACC		0x000000000000f000UL
+#define REGION3_ENTRY_F			0x0000000000000800UL
+#define REGION3_ENTRY_FC		0x0000000000000400UL
+#define REGION3_ENTRY_IEP		0x0000000000000100UL
+#define REGION3_ENTRY_CR		0x0000000000000010UL
+
+#define SEGMENT_TABLE_ORDER		2
+#define SEGMENT_TABLE_ENTRIES		2048
+#define SEGMENT_TABLE_LENGTH		3
+#define SEGMENT_SHIFT			20
+
+#define SEGMENT_ENTRY_ORIGIN		0xfffffffffffff800UL
+#define SEGMENT_ENTRY_SFAA		0xfffffffffff80000UL
+#define SEGMENT_ENTRY_AV		0x0000000000010000UL
+#define SEGMENT_ENTRY_ACC		0x000000000000f000UL
+#define SEGMENT_ENTRY_F			0x0000000000000800UL
+#define SEGMENT_ENTRY_FC		0x0000000000000400UL
+#define SEGMENT_ENTRY_P			0x0000000000000200UL
+#define SEGMENT_ENTRY_IEP		0x0000000000000100UL
+#define SEGMENT_ENTRY_I			0x0000000000000020UL
+#define SEGMENT_ENTRY_CS		0x0000000000000010UL
+#define SEGMENT_ENTRY_TT		0x000000000000000cUL
+
+#define SEGMENT_ENTRY_TT_REGION1	0x000000000000000cUL
+#define SEGMENT_ENTRY_TT_REGION2	0x0000000000000008UL
+#define SEGMENT_ENTRY_TT_REGION3	0x0000000000000004UL
+#define SEGMENT_ENTRY_TT_SEGMENT	0x0000000000000000UL
+
+#define PAGE_TABLE_ORDER		0
+#define PAGE_TABLE_ENTRIES		256
+
+#define PAGE_ENTRY_I			0x0000000000000400UL
+#define PAGE_ENTRY_P			0x0000000000000200UL
+#define PAGE_ENTRY_IEP			0x0000000000000100UL
+
+#define PTRS_PER_PGD			REGION_TABLE_ENTRIES
+#define PTRS_PER_P4D			REGION_TABLE_ENTRIES
+#define PTRS_PER_PUD			REGION_TABLE_ENTRIES
+#define PTRS_PER_PMD			SEGMENT_TABLE_ENTRIES
+#define PTRS_PER_PTE			PAGE_TABLE_ENTRIES
+
+#define PGDIR_SHIFT			REGION1_SHIFT
+#define P4D_SHIFT			REGION2_SHIFT
+#define PUD_SHIFT			REGION3_SHIFT
+#define PMD_SHIFT			SEGMENT_SHIFT
+
+#define pgd_none(entry) (pgd_val(entry) & REGION_ENTRY_I)
+#define p4d_none(entry) (p4d_val(entry) & REGION_ENTRY_I)
+#define pud_none(entry) (pud_val(entry) & REGION_ENTRY_I)
+#define pmd_none(entry) (pmd_val(entry) & SEGMENT_ENTRY_I)
+#define pte_none(entry) (pte_val(entry) & PAGE_ENTRY_I)
+
+#define pgd_addr(entry) __va(pgd_val(entry) & REGION_ENTRY_ORIGIN)
+#define p4d_addr(entry) __va(p4d_val(entry) & REGION_ENTRY_ORIGIN)
+#define pud_addr(entry) __va(pud_val(entry) & REGION_ENTRY_ORIGIN)
+#define pmd_addr(entry) __va(pmd_val(entry) & SEGMENT_ENTRY_ORIGIN)
+#define pte_addr(entry) __va(pte_val(entry) & PAGE_MASK)
+
+#define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))
+#define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1))
+#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
+#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))
+#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+
+#define pgd_offset(table, addr) ((pgd_t *)(table) + pgd_index(addr))
+#define p4d_offset(pgd, addr) ((p4d_t *)pgd_addr(*(pgd)) + p4d_index(addr))
+#define pud_offset(p4d, addr) ((pud_t *)p4d_addr(*(p4d)) + pud_index(addr))
+#define pmd_offset(pud, addr) ((pmd_t *)pud_addr(*(pud)) + pmd_index(addr))
+#define pte_offset(pmd, addr) ((pte_t *)pmd_addr(*(pmd)) + pte_index(addr))
+
+static inline pgd_t *pgd_alloc_one(void)
+{
+	pgd_t *pgd = alloc_pages(REGION_TABLE_ORDER);
+	int i;
+
+	for (i = 0; i < REGION_TABLE_ENTRIES; i++)
+		pgd_val(pgd[i]) = REGION_ENTRY_TT_REGION1 | REGION_ENTRY_I;
+	return pgd;
+}
+
+static inline p4d_t *p4d_alloc_one(void)
+{
+	p4d_t *p4d = alloc_pages(REGION_TABLE_ORDER);
+	int i;
+
+	for (i = 0; i < REGION_TABLE_ENTRIES; i++)
+		p4d_val(p4d[i]) = REGION_ENTRY_TT_REGION2 | REGION_ENTRY_I;
+	return p4d;
+}
+
+static inline p4d_t *p4d_alloc(pgd_t *pgd, unsigned long addr)
+{
+	if (pgd_none(*pgd)) {
+		p4d_t *p4d = p4d_alloc_one();
+		pgd_val(*pgd) = __pa(p4d) | REGION_ENTRY_TT_REGION1 |
+				REGION_TABLE_LENGTH;
+	}
+	return p4d_offset(pgd, addr);
+}
+
+static inline pud_t *pud_alloc_one(void)
+{
+	pud_t *pud = alloc_pages(REGION_TABLE_ORDER);
+	int i;
+
+	for (i = 0; i < REGION_TABLE_ENTRIES; i++)
+		pud_val(pud[i]) = REGION_ENTRY_TT_REGION3 | REGION_ENTRY_I;
+	return pud;
+}
+
+static inline pud_t *pud_alloc(p4d_t *p4d, unsigned long addr)
+{
+	if (p4d_none(*p4d)) {
+		pud_t *pud = pud_alloc_one();
+		p4d_val(*p4d) = __pa(pud) | REGION_ENTRY_TT_REGION2 |
+				REGION_TABLE_LENGTH;
+	}
+	return pud_offset(p4d, addr);
+}
+
+static inline pmd_t *pmd_alloc_one(void)
+{
+	pmd_t *pmd = alloc_pages(SEGMENT_TABLE_ORDER);
+	int i;
+
+	for (i = 0; i < SEGMENT_TABLE_ENTRIES; i++)
+		pmd_val(pmd[i]) = SEGMENT_ENTRY_TT_SEGMENT | SEGMENT_ENTRY_I;
+	return pmd;
+}
+
+static inline pmd_t *pmd_alloc(pud_t *pud, unsigned long addr)
+{
+	if (pud_none(*pud)) {
+		pmd_t *pmd = pmd_alloc_one();
+		pud_val(*pud) = __pa(pmd) | REGION_ENTRY_TT_REGION3 |
+				REGION_TABLE_LENGTH;
+	}
+	return pmd_offset(pud, addr);
+}
+
+static inline pte_t *pte_alloc_one(void)
+{
+	pte_t *pte = alloc_pages(PAGE_TABLE_ORDER);
+	int i;
+
+	for (i = 0; i < PAGE_TABLE_ENTRIES; i++)
+		pte_val(pte[i]) = PAGE_ENTRY_I;
+	return pte;
+}
+
+static inline pte_t *pte_alloc(pmd_t *pmd, unsigned long addr)
+{
+	if (pmd_none(*pmd)) {
+		pte_t *pte = pte_alloc_one();
+		pmd_val(*pmd) = __pa(pte) | SEGMENT_ENTRY_TT_SEGMENT |
+				SEGMENT_TABLE_LENGTH;
+	}
+	return pte_offset(pmd, addr);
+}
+
+static inline void ipte(unsigned long vaddr, pteval_t *p_pte)
+{
+	unsigned long table_origin = (unsigned long)p_pte & PAGE_MASK;
+
+	asm volatile(
+		"	ipte %0,%1\n"
+		: : "a" (table_origin), "a" (vaddr) : "memory");
+}
+
+void configure_dat(int enable);
+
+#endif /* _ASMS390X_PGTABLE_H_ */
diff --git a/lib/s390x/mmu.c b/lib/s390x/mmu.c
new file mode 100644
index 0000000..f0ec7c3
--- /dev/null
+++ b/lib/s390x/mmu.c
@@ -0,0 +1,104 @@
+/*
+ * s390x MMU
+ *
+ * Copyright (c) 2017 Red Hat Inc
+ *
+ * Authors:
+ *  David Hildenbrand <david@xxxxxxxxxx>
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Library General Public License version 2.
+ */
+
+#include <libcflat.h>
+#include <asm/pgtable.h>
+#include <asm/arch_def.h>
+#include <asm/barrier.h>
+#include <vmalloc.h>
+
+void configure_dat(int enable)
+{
+	uint64_t mask;
+
+	if (enable)
+		mask = extract_psw_mask() | PSW_MASK_DAT;
+	else
+		mask = extract_psw_mask() & ~PSW_MASK_DAT;
+
+	load_psw_mask(mask);
+}
+
+static void mmu_enable(pgd_t *pgtable)
+{
+	const uint64_t asce = __pa(pgtable) | ASCE_DT_REGION1 |
+			      REGION_TABLE_LENGTH;
+
+	/* set primary asce */
+	lctlg(1, asce);
+	assert(stctg(1) == asce);
+
+	/* enable dat (primary == 0 set as default) */
+	configure_dat(1);
+}
+
+static pteval_t *get_pte(pgd_t *pgtable, uintptr_t vaddr)
+{
+	pgd_t *pgd = pgd_offset(pgtable, vaddr);
+	p4d_t *p4d = p4d_alloc(pgd, vaddr);
+	pud_t *pud = pud_alloc(p4d, vaddr);
+	pmd_t *pmd = pmd_alloc(pud, vaddr);
+	pte_t *pte = pte_alloc(pmd, vaddr);
+
+	return &pte_val(*pte);
+}
+
+phys_addr_t virt_to_pte_phys(pgd_t *pgtable, void *vaddr)
+{
+	return (*get_pte(pgtable, (uintptr_t)vaddr) & PAGE_MASK) +
+	       ((unsigned long)vaddr & ~PAGE_MASK);
+}
+
+pteval_t *install_page(pgd_t *pgtable, phys_addr_t phys, void *vaddr)
+{
+	pteval_t *p_pte = get_pte(pgtable, (uintptr_t)vaddr);
+
+	/* first flush the old entry (if we're replacing anything) */
+	if (!(*p_pte & PAGE_ENTRY_I))
+		ipte((uintptr_t)vaddr, p_pte);
+
+	*p_pte = __pa(phys);
+	return p_pte;
+}
+
+static void setup_identity(pgd_t *pgtable, phys_addr_t start_addr,
+			   phys_addr_t end_addr)
+{
+	phys_addr_t cur;
+
+	start_addr &= PAGE_MASK;
+	for (cur = start_addr; true; cur += PAGE_SIZE) {
+		if (start_addr < end_addr && cur >= end_addr)
+			break;
+		if (start_addr > end_addr && cur <= end_addr)
+			break;
+		install_page(pgtable, cur, __va(cur));
+	}
+}
+
+void *setup_mmu(phys_addr_t phys_end){
+	pgd_t *page_root;
+
+	/* allocate a region-1 table */
+	page_root = pgd_alloc_one();
+
+	/* map all physical memory 1:1 */
+	setup_identity(page_root, 0, phys_end);
+
+	/* generate 128MB of invalid adresses at the end (for testing PGM) */
+	init_alloc_vpage((void *) -(1UL << 27));
+	setup_identity(page_root, -(1UL << 27), 0);
+
+	/* finally enable DAT with the new table */
+	mmu_enable(page_root);
+	return page_root;
+}
diff --git a/lib/s390x/sclp.c b/lib/s390x/sclp.c
index c7471b1..005ae52 100644
--- a/lib/s390x/sclp.c
+++ b/lib/s390x/sclp.c
@@ -28,6 +28,8 @@ static void mem_init(phys_addr_t mem_end)
 	phys_addr_t freemem_start = (phys_addr_t)&stacktop & PAGE_MASK;
 
 	phys_alloc_init(freemem_start, mem_end - freemem_start);
+
+	setup_vm();
 }
 
 void sclp_memory_setup(void)
diff --git a/s390x/Makefile b/s390x/Makefile
index 4198fdc..d9bef37 100644
--- a/s390x/Makefile
+++ b/s390x/Makefile
@@ -24,12 +24,14 @@ cflatobjs += lib/util.o
 cflatobjs += lib/alloc.o
 cflatobjs += lib/alloc_phys.o
 cflatobjs += lib/alloc_page.o
+cflatobjs += lib/vmalloc.o
 cflatobjs += lib/alloc_phys.o
 cflatobjs += lib/s390x/io.o
 cflatobjs += lib/s390x/stack.o
 cflatobjs += lib/s390x/sclp.o
 cflatobjs += lib/s390x/sclp-ascii.o
 cflatobjs += lib/s390x/interrupt.o
+cflatobjs += lib/s390x/mmu.o
 
 OBJDIRS += lib/s390x
 
-- 
2.14.3




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux