Currently virt_to_phys() and phys_to_virt() return passed address, because identity paging is used. With this change the virtual memory layout is redefined in three regions as follows (PO stands for PAGE_OFFSET): 0..&edata code, stack, data PO..<PO + RAM size> dynamic RAM pages <PO + RAM size>..<VA limit> MMIO, virtual mappings As result, virt_to_phys() and phys_to_virt() functions apply PAGE_OFFSET to passed addresses. Furthermore 1:1 mapping at 3GB of physical memory is removed and memory-mapped hardware devices should be ioremap()-ed and referred using MMIO accessors. Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> Signed-off-by: Alexander Gordeev <agordeev@xxxxxxxxxx> --- lib/x86/asm/io.h | 12 ---------- lib/x86/asm/page.h | 5 ++++ lib/x86/vm.c | 70 +++++++++++++++++++++++++++++++++++------------------- 3 files changed, 51 insertions(+), 36 deletions(-) diff --git a/lib/x86/asm/io.h b/lib/x86/asm/io.h index 35a5c7347411..4a6447448a77 100644 --- a/lib/x86/asm/io.h +++ b/lib/x86/asm/io.h @@ -45,18 +45,6 @@ static inline void outl(uint32_t value, unsigned long port) asm volatile("outl %0, %w1" : : "a"(value), "Nd"((unsigned short)port)); } -#define virt_to_phys virt_to_phys -static inline unsigned long virt_to_phys(const void *virt) -{ - return (unsigned long)virt; -} - -#define phys_to_virt phys_to_virt -static inline void *phys_to_virt(unsigned long phys) -{ - return (void *)phys; -} - #define ioremap ioremap void __iomem *ioremap(phys_addr_t phys_addr, size_t size); diff --git a/lib/x86/asm/page.h b/lib/x86/asm/page.h index c43bab28ca2e..8e7002776c7a 100644 --- a/lib/x86/asm/page.h +++ b/lib/x86/asm/page.h @@ -10,6 +10,8 @@ #include <linux/const.h> #include <bitops.h> +#define PAGE_OFFSET 0x40000000 + #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1,UL) << PAGE_SHIFT) #define PAGE_MASK (~(PAGE_SIZE-1)) @@ -44,5 +46,8 @@ #define PGDIR_BITS(lvl) (((lvl) - 1) * PGDIR_WIDTH + PAGE_SHIFT) #define PGDIR_OFFSET(va, lvl) (((va) >> PGDIR_BITS(lvl)) & PGDIR_MASK) +#define __va(x) ((void *)((unsigned long)(x) + PAGE_OFFSET)) +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + #endif /* !__ASSEMBLY__ */ #endif diff --git a/lib/x86/vm.c b/lib/x86/vm.c index 85625e5c934e..994c69df94d0 100644 --- a/lib/x86/vm.c +++ b/lib/x86/vm.c @@ -2,12 +2,18 @@ #include "vm.h" #include "libcflat.h" #include "apic.h" +#include "asm/page.h" -static void *free = 0; +extern char edata; +static void *free = &edata; static void *vfree_top = 0; +static unsigned long end_of_memory; +static int pg_on = 0; static void free_memory(void *mem, unsigned long size) { + free = NULL; + while (size >= PAGE_SIZE) { *(void **)mem = free; free = mem; @@ -35,8 +41,29 @@ void free_page(void *page) free = page; } -extern char edata; -static unsigned long end_of_memory; +static void *alloc_page_no_pg() +{ + void *p = free; + + free += PAGE_SIZE; + + return p; +} + +static void *__alloc_page_table() +{ + return pg_on ? alloc_page() : alloc_page_no_pg(); +} + +static inline unsigned long __virt_to_phys(void *virt) +{ + return pg_on ? virt_to_phys(virt) : (unsigned long)virt; +} + +static inline void *__phys_to_virt(unsigned long phys) +{ + return pg_on ? phys_to_virt(phys) : (void *)phys; +} unsigned long *install_pte(unsigned long *cr3, int pte_level, @@ -50,11 +77,11 @@ unsigned long *install_pte(unsigned long *cr3, for (level = PAGE_LEVEL; level > pte_level; --level) { offset = PGDIR_OFFSET((unsigned long)virt, level); if (!(pt[offset] & PT_PRESENT_MASK)) { - unsigned long *new_pt = alloc_page(); + unsigned long *new_pt = __alloc_page_table(); memset(new_pt, 0, PAGE_SIZE); - pt[offset] = virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; + pt[offset] = __virt_to_phys(new_pt) | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK; } - pt = phys_to_virt(pt[offset] & PT_ADDR_MASK); + pt = __phys_to_virt(pt[offset] & PT_ADDR_MASK); } offset = PGDIR_OFFSET((unsigned long)virt, level); pt[offset] = pte; @@ -84,8 +111,7 @@ unsigned long *install_large_page(unsigned long *cr3, unsigned long phys, void *virt) { - return install_pte(cr3, 2, virt, - phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK); + return install_pte(cr3, 2, virt, phys | PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK | PT_PAGE_SIZE_MASK); } unsigned long *install_page(unsigned long *cr3, @@ -121,26 +147,19 @@ static void setup_mmu_range(unsigned long *cr3, unsigned long start, void *virt, } } +#define MAX_PT_NR (2048 + 4) +#define PT_START ((unsigned long)&edata) +#define PT_END (PT_START + (MAX_PT_NR * PAGE_SIZE)) + static void setup_mmu(unsigned long len) { - unsigned long *cr3 = alloc_page(); + unsigned long *cr3 = alloc_page_no_pg(); memset(cr3, 0, PAGE_SIZE); -#ifdef __x86_64__ - if (len < (1ul << 32)) - len = (1ul << 32); /* map mmio 1:1 */ - - setup_mmu_range(cr3, 0, (void *)0, len); -#else - if (len > (1ul << 31)) - len = (1ul << 31); - - /* 0 - 2G memory, 2G-3G valloc area, 3G-4G mmio */ - setup_mmu_range(cr3, 0, (void *)0, len); - setup_mmu_range(cr3, 3ul << 30, (void *)(3ul << 30), (1ul << 30)); - vfree_top = (void*)(3ul << 30); -#endif + assert(len >= PT_END); + setup_mmu_range(cr3, 0, (void *)0, PT_START); + setup_mmu_range(cr3, PT_START, phys_to_virt(PT_START), len - PT_START); write_cr3((unsigned long)cr3); #ifndef __x86_64__ @@ -148,6 +167,8 @@ static void setup_mmu(unsigned long len) #endif write_cr0(X86_CR0_PG |X86_CR0_PE | X86_CR0_WP); + pg_on = 1; + printf("paging enabled\n"); printf("cr0 = %lx\n", read_cr0()); printf("cr3 = %lx\n", read_cr3()); @@ -158,8 +179,9 @@ void setup_vm() { assert(!end_of_memory); end_of_memory = fwcfg_get_u64(FW_CFG_RAM_SIZE); - free_memory(&edata, end_of_memory - (unsigned long)&edata); + end_of_memory -= 0x20 * PAGE_SIZE; /* s3 ACPI tables hack */ setup_mmu(end_of_memory); + free_memory(phys_to_virt(PT_END), end_of_memory - PT_END); ioremap_apic(); } -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html