Signed-off-by: Gleb Natapov <gleb@xxxxxxxxxx> --- config-x86-common.mak | 14 ++-- lib/x86/vm.c | 229 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/x86/vm.h | 46 ++++++++++ x86/vm.c | 229 ------------------------------------------------- x86/vm.h | 46 ---------- 5 files changed, 281 insertions(+), 283 deletions(-) create mode 100644 lib/x86/vm.c create mode 100644 lib/x86/vm.h delete mode 100644 x86/vm.c delete mode 100644 x86/vm.h diff --git a/config-x86-common.mak b/config-x86-common.mak index 367c0be..c546ee3 100644 --- a/config-x86-common.mak +++ b/config-x86-common.mak @@ -8,6 +8,7 @@ cflatobjs += \ lib/x86/io.o \ lib/x86/smp.o +cflatobjs += lib/x86/vm.o cflatobjs += lib/x86/fwcfg.o cflatobjs += lib/x86/apic.o cflatobjs += lib/x86/atomic.o @@ -43,21 +44,19 @@ $(TEST_DIR)/access.elf: $(cstart.o) $(TEST_DIR)/access.o $(TEST_DIR)/hypercall.elf: $(cstart.o) $(TEST_DIR)/hypercall.o -$(TEST_DIR)/sieve.elf: $(cstart.o) $(TEST_DIR)/sieve.o \ - $(TEST_DIR)/vm.o +$(TEST_DIR)/sieve.elf: $(cstart.o) $(TEST_DIR)/sieve.o $(TEST_DIR)/vmexit.elf: $(cstart.o) $(TEST_DIR)/vmexit.o $(TEST_DIR)/smptest.elf: $(cstart.o) $(TEST_DIR)/smptest.o -$(TEST_DIR)/emulator.elf: $(cstart.o) $(TEST_DIR)/emulator.o \ - $(TEST_DIR)/vm.o +$(TEST_DIR)/emulator.elf: $(cstart.o) $(TEST_DIR)/emulator.o $(TEST_DIR)/port80.elf: $(cstart.o) $(TEST_DIR)/port80.o $(TEST_DIR)/tsc.elf: $(cstart.o) $(TEST_DIR)/tsc.o -$(TEST_DIR)/apic.elf: $(cstart.o) $(TEST_DIR)/apic.o $(TEST_DIR)/vm.o +$(TEST_DIR)/apic.elf: $(cstart.o) $(TEST_DIR)/apic.o $(TEST_DIR)/realmode.elf: $(TEST_DIR)/realmode.o $(CC) -m32 -nostdlib -o $@ -Wl,-T,$(TEST_DIR)/realmode.lds $^ @@ -70,10 +69,9 @@ $(TEST_DIR)/idt_test.elf: $(cstart.o) $(TEST_DIR)/idt_test.o $(TEST_DIR)/xsave.elf: $(cstart.o) $(TEST_DIR)/xsave.o -$(TEST_DIR)/rmap_chain.elf: $(cstart.o) $(TEST_DIR)/rmap_chain.o \ - $(TEST_DIR)/vm.o +$(TEST_DIR)/rmap_chain.elf: $(cstart.o) $(TEST_DIR)/rmap_chain.o -$(TEST_DIR)/svm.elf: $(cstart.o) $(TEST_DIR)/vm.o +$(TEST_DIR)/svm.elf: $(cstart.o) $(TEST_DIR)/kvmclock_test.elf: $(cstart.o) $(TEST_DIR)/kvmclock.o \ $(TEST_DIR)/kvmclock_test.o diff --git a/lib/x86/vm.c b/lib/x86/vm.c new file mode 100644 index 0000000..b34449f --- /dev/null +++ b/lib/x86/vm.c @@ -0,0 +1,229 @@ +#include "vm.h" +#include "libcflat.h" + +#define PAGE_SIZE 4096ul +#ifdef __x86_64__ +#define LARGE_PAGE_SIZE (512 * PAGE_SIZE) +#else +#define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) +#endif + +#define X86_CR0_PE 0x00000001 +#define X86_CR0_PG 0x80000000 +#define X86_CR4_PSE 0x00000010 +static void *free = 0; +static void *vfree_top = 0; + +static void free_memory(void *mem, unsigned long size) +{ + while (size >= PAGE_SIZE) { + *(void **)mem = free; + free = mem; + mem += PAGE_SIZE; + size -= PAGE_SIZE; + } +} + +void *alloc_page() +{ + void *p; + + if (!free) + return 0; + + p = free; + free = *(void **)free; + + return p; +} + +void free_page(void *page) +{ + *(void **)page = free; + free = page; +} + +extern char edata; +static unsigned long end_of_memory; + +#ifdef __x86_64__ +#define PAGE_LEVEL 4 +#define PGDIR_WIDTH 9 +#define PGDIR_MASK 511 +#else +#define PAGE_LEVEL 2 +#define PGDIR_WIDTH 10 +#define PGDIR_MASK 1023 +#endif + +void install_pte(unsigned long *cr3, + int pte_level, + void *virt, + unsigned long pte, + unsigned long *pt_page) +{ + int level; + unsigned long *pt = cr3; + unsigned offset; + + for (level = PAGE_LEVEL; level > pte_level; --level) { + offset = ((unsigned long)virt >> ((level-1) * PGDIR_WIDTH + 12)) & PGDIR_MASK; + if (!(pt[offset] & PTE_PRESENT)) { + unsigned long *new_pt = pt_page; + if (!new_pt) + new_pt = alloc_page(); + else + pt_page = 0; + memset(new_pt, 0, PAGE_SIZE); + pt[offset] = virt_to_phys(new_pt) | PTE_PRESENT | PTE_WRITE; + } + pt = phys_to_virt(pt[offset] & 0xffffffffff000ull); + } + offset = ((unsigned long)virt >> ((level-1) * PGDIR_WIDTH + 12)) & PGDIR_MASK; + pt[offset] = pte; +} + +static unsigned long get_pte(unsigned long *cr3, void *virt) +{ + int level; + unsigned long *pt = cr3, pte; + unsigned offset; + + for (level = PAGE_LEVEL; level > 1; --level) { + offset = ((unsigned long)virt >> (((level-1) * PGDIR_WIDTH) + 12)) & PGDIR_MASK; + pte = pt[offset]; + if (!(pte & PTE_PRESENT)) + return 0; + if (level == 2 && (pte & PTE_PSE)) + return pte; + pt = phys_to_virt(pte & 0xffffffffff000ull); + } + offset = ((unsigned long)virt >> (((level-1) * PGDIR_WIDTH) + 12)) & PGDIR_MASK; + pte = pt[offset]; + return pte; +} + +void install_large_page(unsigned long *cr3, + unsigned long phys, + void *virt) +{ + install_pte(cr3, 2, virt, phys | PTE_PRESENT | PTE_WRITE | PTE_PSE, 0); +} + +void install_page(unsigned long *cr3, + unsigned long phys, + void *virt) +{ + install_pte(cr3, 1, virt, phys | PTE_PRESENT | PTE_WRITE, 0); +} + + +static inline void load_gdt(unsigned long *table, int nent) +{ + struct descriptor_table_ptr descr; + + descr.limit = nent * 8 - 1; + descr.base = (ulong)table; + lgdt(&descr); +} + +#define SEG_CS_32 8 +#define SEG_CS_64 16 + +struct ljmp { + void *ofs; + unsigned short seg; +}; + +static void setup_mmu(unsigned long len) +{ + unsigned long *cr3 = alloc_page(); + unsigned long phys = 0; + + if (len < (1ul << 32)) + len = 1ul << 32; /* map mmio 1:1 */ + + memset(cr3, 0, PAGE_SIZE); + while (phys + LARGE_PAGE_SIZE <= len) { + install_large_page(cr3, phys, (void *)phys); + phys += LARGE_PAGE_SIZE; + } + while (phys + PAGE_SIZE <= len) { + install_page(cr3, phys, (void *)phys); + phys += PAGE_SIZE; + } + write_cr3(virt_to_phys(cr3)); +#ifndef __x86_64__ + write_cr4(X86_CR4_PSE); +#endif + write_cr0(X86_CR0_PG |X86_CR0_PE); + + printf("paging enabled\n"); + printf("cr0 = %x\n", read_cr0()); + printf("cr3 = %x\n", read_cr3()); + printf("cr4 = %x\n", read_cr4()); +} + +static unsigned int inl(unsigned short port) +{ + unsigned int val; + asm volatile("inl %w1, %0" : "=a"(val) : "Nd"(port)); + return val; +} + +void setup_vm() +{ + end_of_memory = inl(0xd1); + free_memory(&edata, end_of_memory - (unsigned long)&edata); + setup_mmu(end_of_memory); +} + +void *vmalloc(unsigned long size) +{ + void *mem, *p; + unsigned pages; + + size += sizeof(unsigned long); + + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + vfree_top -= size; + mem = p = vfree_top; + pages = size / PAGE_SIZE; + while (pages--) { + install_page(phys_to_virt(read_cr3()), virt_to_phys(alloc_page()), p); + p += PAGE_SIZE; + } + *(unsigned long *)mem = size; + mem += sizeof(unsigned long); + return mem; +} + +void vfree(void *mem) +{ + unsigned long size = ((unsigned long *)mem)[-1]; + + while (size) { + free_page(phys_to_virt(get_pte(phys_to_virt(read_cr3()), mem) & PTE_ADDR)); + mem += PAGE_SIZE; + size -= PAGE_SIZE; + } +} + +void *vmap(unsigned long long phys, unsigned long size) +{ + void *mem, *p; + unsigned pages; + + size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); + vfree_top -= size; + phys &= ~(unsigned long long)(PAGE_SIZE - 1); + + mem = p = vfree_top; + pages = size / PAGE_SIZE; + while (pages--) { + install_page(phys_to_virt(read_cr3()), phys, p); + phys += PAGE_SIZE; + p += PAGE_SIZE; + } + return mem; +} diff --git a/lib/x86/vm.h b/lib/x86/vm.h new file mode 100644 index 0000000..a3d2676 --- /dev/null +++ b/lib/x86/vm.h @@ -0,0 +1,46 @@ +#ifndef VM_H +#define VM_H + +#include "processor.h" + +#define PAGE_SIZE 4096ul +#ifdef __x86_64__ +#define LARGE_PAGE_SIZE (512 * PAGE_SIZE) +#else +#define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) +#endif + +#define PTE_PRESENT (1ull << 0) +#define PTE_PSE (1ull << 7) +#define PTE_WRITE (1ull << 1) +#define PTE_ADDR (0xffffffffff000ull) + +void setup_vm(); + +void *vmalloc(unsigned long size); +void vfree(void *mem); +void *vmap(unsigned long long phys, unsigned long size); + +void install_pte(unsigned long *cr3, + int pte_level, + void *virt, + unsigned long pte, + unsigned long *pt_page); + +void *alloc_page(); + +void install_large_page(unsigned long *cr3,unsigned long phys, + void *virt); +void install_page(unsigned long *cr3, unsigned long phys, void *virt); + +static inline unsigned long virt_to_phys(const void *virt) +{ + return (unsigned long)virt; +} + +static inline void *phys_to_virt(unsigned long phys) +{ + return (void *)phys; +} + +#endif diff --git a/x86/vm.c b/x86/vm.c deleted file mode 100644 index b34449f..0000000 --- a/x86/vm.c +++ /dev/null @@ -1,229 +0,0 @@ -#include "vm.h" -#include "libcflat.h" - -#define PAGE_SIZE 4096ul -#ifdef __x86_64__ -#define LARGE_PAGE_SIZE (512 * PAGE_SIZE) -#else -#define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) -#endif - -#define X86_CR0_PE 0x00000001 -#define X86_CR0_PG 0x80000000 -#define X86_CR4_PSE 0x00000010 -static void *free = 0; -static void *vfree_top = 0; - -static void free_memory(void *mem, unsigned long size) -{ - while (size >= PAGE_SIZE) { - *(void **)mem = free; - free = mem; - mem += PAGE_SIZE; - size -= PAGE_SIZE; - } -} - -void *alloc_page() -{ - void *p; - - if (!free) - return 0; - - p = free; - free = *(void **)free; - - return p; -} - -void free_page(void *page) -{ - *(void **)page = free; - free = page; -} - -extern char edata; -static unsigned long end_of_memory; - -#ifdef __x86_64__ -#define PAGE_LEVEL 4 -#define PGDIR_WIDTH 9 -#define PGDIR_MASK 511 -#else -#define PAGE_LEVEL 2 -#define PGDIR_WIDTH 10 -#define PGDIR_MASK 1023 -#endif - -void install_pte(unsigned long *cr3, - int pte_level, - void *virt, - unsigned long pte, - unsigned long *pt_page) -{ - int level; - unsigned long *pt = cr3; - unsigned offset; - - for (level = PAGE_LEVEL; level > pte_level; --level) { - offset = ((unsigned long)virt >> ((level-1) * PGDIR_WIDTH + 12)) & PGDIR_MASK; - if (!(pt[offset] & PTE_PRESENT)) { - unsigned long *new_pt = pt_page; - if (!new_pt) - new_pt = alloc_page(); - else - pt_page = 0; - memset(new_pt, 0, PAGE_SIZE); - pt[offset] = virt_to_phys(new_pt) | PTE_PRESENT | PTE_WRITE; - } - pt = phys_to_virt(pt[offset] & 0xffffffffff000ull); - } - offset = ((unsigned long)virt >> ((level-1) * PGDIR_WIDTH + 12)) & PGDIR_MASK; - pt[offset] = pte; -} - -static unsigned long get_pte(unsigned long *cr3, void *virt) -{ - int level; - unsigned long *pt = cr3, pte; - unsigned offset; - - for (level = PAGE_LEVEL; level > 1; --level) { - offset = ((unsigned long)virt >> (((level-1) * PGDIR_WIDTH) + 12)) & PGDIR_MASK; - pte = pt[offset]; - if (!(pte & PTE_PRESENT)) - return 0; - if (level == 2 && (pte & PTE_PSE)) - return pte; - pt = phys_to_virt(pte & 0xffffffffff000ull); - } - offset = ((unsigned long)virt >> (((level-1) * PGDIR_WIDTH) + 12)) & PGDIR_MASK; - pte = pt[offset]; - return pte; -} - -void install_large_page(unsigned long *cr3, - unsigned long phys, - void *virt) -{ - install_pte(cr3, 2, virt, phys | PTE_PRESENT | PTE_WRITE | PTE_PSE, 0); -} - -void install_page(unsigned long *cr3, - unsigned long phys, - void *virt) -{ - install_pte(cr3, 1, virt, phys | PTE_PRESENT | PTE_WRITE, 0); -} - - -static inline void load_gdt(unsigned long *table, int nent) -{ - struct descriptor_table_ptr descr; - - descr.limit = nent * 8 - 1; - descr.base = (ulong)table; - lgdt(&descr); -} - -#define SEG_CS_32 8 -#define SEG_CS_64 16 - -struct ljmp { - void *ofs; - unsigned short seg; -}; - -static void setup_mmu(unsigned long len) -{ - unsigned long *cr3 = alloc_page(); - unsigned long phys = 0; - - if (len < (1ul << 32)) - len = 1ul << 32; /* map mmio 1:1 */ - - memset(cr3, 0, PAGE_SIZE); - while (phys + LARGE_PAGE_SIZE <= len) { - install_large_page(cr3, phys, (void *)phys); - phys += LARGE_PAGE_SIZE; - } - while (phys + PAGE_SIZE <= len) { - install_page(cr3, phys, (void *)phys); - phys += PAGE_SIZE; - } - write_cr3(virt_to_phys(cr3)); -#ifndef __x86_64__ - write_cr4(X86_CR4_PSE); -#endif - write_cr0(X86_CR0_PG |X86_CR0_PE); - - printf("paging enabled\n"); - printf("cr0 = %x\n", read_cr0()); - printf("cr3 = %x\n", read_cr3()); - printf("cr4 = %x\n", read_cr4()); -} - -static unsigned int inl(unsigned short port) -{ - unsigned int val; - asm volatile("inl %w1, %0" : "=a"(val) : "Nd"(port)); - return val; -} - -void setup_vm() -{ - end_of_memory = inl(0xd1); - free_memory(&edata, end_of_memory - (unsigned long)&edata); - setup_mmu(end_of_memory); -} - -void *vmalloc(unsigned long size) -{ - void *mem, *p; - unsigned pages; - - size += sizeof(unsigned long); - - size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); - vfree_top -= size; - mem = p = vfree_top; - pages = size / PAGE_SIZE; - while (pages--) { - install_page(phys_to_virt(read_cr3()), virt_to_phys(alloc_page()), p); - p += PAGE_SIZE; - } - *(unsigned long *)mem = size; - mem += sizeof(unsigned long); - return mem; -} - -void vfree(void *mem) -{ - unsigned long size = ((unsigned long *)mem)[-1]; - - while (size) { - free_page(phys_to_virt(get_pte(phys_to_virt(read_cr3()), mem) & PTE_ADDR)); - mem += PAGE_SIZE; - size -= PAGE_SIZE; - } -} - -void *vmap(unsigned long long phys, unsigned long size) -{ - void *mem, *p; - unsigned pages; - - size = (size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1); - vfree_top -= size; - phys &= ~(unsigned long long)(PAGE_SIZE - 1); - - mem = p = vfree_top; - pages = size / PAGE_SIZE; - while (pages--) { - install_page(phys_to_virt(read_cr3()), phys, p); - phys += PAGE_SIZE; - p += PAGE_SIZE; - } - return mem; -} diff --git a/x86/vm.h b/x86/vm.h deleted file mode 100644 index a3d2676..0000000 --- a/x86/vm.h +++ /dev/null @@ -1,46 +0,0 @@ -#ifndef VM_H -#define VM_H - -#include "processor.h" - -#define PAGE_SIZE 4096ul -#ifdef __x86_64__ -#define LARGE_PAGE_SIZE (512 * PAGE_SIZE) -#else -#define LARGE_PAGE_SIZE (1024 * PAGE_SIZE) -#endif - -#define PTE_PRESENT (1ull << 0) -#define PTE_PSE (1ull << 7) -#define PTE_WRITE (1ull << 1) -#define PTE_ADDR (0xffffffffff000ull) - -void setup_vm(); - -void *vmalloc(unsigned long size); -void vfree(void *mem); -void *vmap(unsigned long long phys, unsigned long size); - -void install_pte(unsigned long *cr3, - int pte_level, - void *virt, - unsigned long pte, - unsigned long *pt_page); - -void *alloc_page(); - -void install_large_page(unsigned long *cr3,unsigned long phys, - void *virt); -void install_page(unsigned long *cr3, unsigned long phys, void *virt); - -static inline unsigned long virt_to_phys(const void *virt) -{ - return (unsigned long)virt; -} - -static inline void *phys_to_virt(unsigned long phys) -{ - return (void *)phys; -} - -#endif -- 1.7.2.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html