Create a new arch-specific subdirectory to contain architecture-specific code and includes. The Makefile now adds various arch-specific objects based on detected architecture. That aside, this patch should only contain code moves. These include: - x86-specific kvm_cpu setup, kernel loading, memory setup etc. now in x86/kvm{-cpu}.c - BIOS now lives in x86/bios/ - ioport setup - KVM extensions are asserted in arch-specific kvm.c now, so each architecture can manage its own dependencies. - Various architecture-specific #defines are moved into $(ARCH)/include/kvm{-cpu}.h such as struct kvm_cpu, KVM_NR_CPUS, KVM_32BIT_GAP_SIZE. Signed-off-by: Matt Evans <matt@xxxxxxxxxx> --- tools/kvm/Makefile | 96 ++++--- tools/kvm/builtin-run.c | 6 +- tools/kvm/include/kvm/ioport.h | 2 +- tools/kvm/include/kvm/kvm-cpu.h | 27 +-- tools/kvm/include/kvm/kvm.h | 58 +--- tools/kvm/ioport.c | 54 ---- tools/kvm/kvm-cpu.c | 372 ---------------------- tools/kvm/kvm.c | 323 +------------------- tools/kvm/{ => x86}/bios.c | 0 tools/kvm/{ => x86}/bios/.gitignore | 0 tools/kvm/{ => x86}/bios/bios-rom.S | 2 +- tools/kvm/{ => x86}/bios/e820.c | 0 tools/kvm/{ => x86}/bios/entry.S | 0 tools/kvm/{ => x86}/bios/gen-offsets.sh | 0 tools/kvm/{ => x86}/bios/int10.c | 0 tools/kvm/{ => x86}/bios/int15.c | 0 tools/kvm/{ => x86}/bios/local.S | 0 tools/kvm/{ => x86}/bios/macro.S | 0 tools/kvm/{ => x86}/bios/memcpy.c | 0 tools/kvm/{ => x86}/bios/rom.ld.S | 0 tools/kvm/{ => x86}/cpuid.c | 0 tools/kvm/{ => x86}/include/kvm/assembly.h | 0 tools/kvm/{ => x86}/include/kvm/barrier.h | 0 tools/kvm/{ => x86}/include/kvm/bios-export.h | 0 tools/kvm/{ => x86}/include/kvm/bios.h | 0 tools/kvm/{ => x86}/include/kvm/boot-protocol.h | 0 tools/kvm/{ => x86}/include/kvm/cpufeature.h | 0 tools/kvm/{ => x86}/include/kvm/interrupt.h | 0 tools/kvm/x86/include/kvm/kvm-arch.h | 59 ++++ tools/kvm/x86/include/kvm/kvm-cpu-arch.h | 33 ++ tools/kvm/{ => x86}/include/kvm/mptable.h | 0 tools/kvm/{ => x86}/interrupt.c | 0 tools/kvm/x86/ioport.c | 59 ++++ tools/kvm/{ => x86}/irq.c | 0 tools/kvm/x86/kvm-cpu.c | 383 +++++++++++++++++++++++ tools/kvm/x86/kvm.c | 330 +++++++++++++++++++ tools/kvm/{ => x86}/mptable.c | 0 37 files changed, 951 insertions(+), 853 deletions(-) rename tools/kvm/{ => x86}/bios.c (100%) rename tools/kvm/{ => x86}/bios/.gitignore (100%) rename tools/kvm/{ => x86}/bios/bios-rom.S (80%) rename tools/kvm/{ => x86}/bios/e820.c (100%) rename tools/kvm/{ => x86}/bios/entry.S (100%) rename tools/kvm/{ => x86}/bios/gen-offsets.sh (100%) rename tools/kvm/{ => x86}/bios/int10.c (100%) rename tools/kvm/{ => x86}/bios/int15.c (100%) rename tools/kvm/{ => x86}/bios/local.S (100%) rename tools/kvm/{ => x86}/bios/macro.S (100%) rename tools/kvm/{ => x86}/bios/memcpy.c (100%) rename tools/kvm/{ => x86}/bios/rom.ld.S (100%) rename tools/kvm/{ => x86}/cpuid.c (100%) rename tools/kvm/{ => x86}/include/kvm/assembly.h (100%) rename tools/kvm/{ => x86}/include/kvm/barrier.h (100%) rename tools/kvm/{ => x86}/include/kvm/bios-export.h (100%) rename tools/kvm/{ => x86}/include/kvm/bios.h (100%) rename tools/kvm/{ => x86}/include/kvm/boot-protocol.h (100%) rename tools/kvm/{ => x86}/include/kvm/cpufeature.h (100%) rename tools/kvm/{ => x86}/include/kvm/interrupt.h (100%) create mode 100644 tools/kvm/x86/include/kvm/kvm-arch.h create mode 100644 tools/kvm/x86/include/kvm/kvm-cpu-arch.h rename tools/kvm/{ => x86}/include/kvm/mptable.h (100%) rename tools/kvm/{ => x86}/interrupt.c (100%) create mode 100644 tools/kvm/x86/ioport.c rename tools/kvm/{ => x86}/irq.c (100%) create mode 100644 tools/kvm/x86/kvm-cpu.c create mode 100644 tools/kvm/x86/kvm.c rename tools/kvm/{ => x86}/mptable.c (100%) diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile index bb5f6b0..243886e 100644 --- a/tools/kvm/Makefile +++ b/tools/kvm/Makefile @@ -33,13 +33,11 @@ OBJS += builtin-run.o OBJS += builtin-setup.o OBJS += builtin-stop.o OBJS += builtin-version.o -OBJS += cpuid.o OBJS += disk/core.o OBJS += framebuffer.o OBJS += guest_compat.o OBJS += hw/rtc.o OBJS += hw/serial.o -OBJS += interrupt.o OBJS += ioport.o OBJS += kvm-cpu.o OBJS += kvm.o @@ -61,7 +59,6 @@ OBJS += disk/blk.o OBJS += disk/qcow.o OBJS += disk/raw.o OBJS += ioeventfd.o -OBJS += irq.o OBJS += net/uip/core.o OBJS += net/uip/arp.o OBJS += net/uip/icmp.o @@ -72,7 +69,6 @@ OBJS += net/uip/buf.o OBJS += net/uip/csum.o OBJS += net/uip/dhcp.o OBJS += kvm-cmd.o -OBJS += mptable.o OBJS += rbtree.o OBJS += threadpool.o OBJS += util/parse-options.o @@ -123,12 +119,6 @@ ifeq ($(has_AIO),y) LIBS += -laio endif -DEPS := $(patsubst %.o,%.d,$(OBJS)) - -# Exclude BIOS object files from header dependencies. -OBJS += bios.o -OBJS += bios/bios-rom.o - LIBS += -lrt LIBS += -lpthread LIBS += -lutil @@ -150,12 +140,43 @@ ifeq ($(uname_M),x86_64) DEFINES += -DCONFIG_X86_64 endif + +### Arch-specific stuff + +#x86 +ifeq ($(ARCH),x86) + DEFINES += -DCONFIG_X86 + OBJS += x86/cpuid.o + OBJS += x86/interrupt.o + OBJS += x86/ioport.o + OBJS += x86/irq.o + OBJS += x86/kvm.o + OBJS += x86/kvm-cpu.o + OBJS += x86/mptable.o +# Exclude BIOS object files from header dependencies. + OTHEROBJS += x86/bios.o + OTHEROBJS += x86/bios/bios-rom.o + ARCH_INCLUDE := x86/include +endif + +### + +ifeq (,$(ARCH_INCLUDE)) + UNSUPP_ERR = @echo "This architecture is not supported in kvmtool." && exit 1 +else + UNSUPP_ERR = +endif + +DEPS := $(patsubst %.o,%.d,$(OBJS)) +OBJS += $(OTHEROBJS) + DEFINES += -D_FILE_OFFSET_BITS=64 DEFINES += -D_GNU_SOURCE DEFINES += -DKVMTOOLS_VERSION='"$(KVMTOOLS_VERSION)"' +DEFINES += -DBUILD_ARCH='"$(ARCH)"' KVM_INCLUDE := include -CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g +CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g ifneq ($(WERROR),0) WARNINGS += -Werror @@ -179,7 +200,10 @@ WARNINGS += -Wwrite-strings CFLAGS += $(WARNINGS) -all: $(PROGRAM) $(GUEST_INIT) +all: arch_support_check $(PROGRAM) $(GUEST_INIT) + +arch_support_check: + $(UNSUPP_ERR) KVMTOOLS-VERSION-FILE: @$(SHELL_PATH) util/KVMTOOLS-VERSION-GEN $(OUTPUT) @@ -227,33 +251,33 @@ BIOS_CFLAGS += -mregparm=3 BIOS_CFLAGS += -fno-stack-protector BIOS_CFLAGS += -I../../arch/$(ARCH) -bios.o: bios/bios.bin bios/bios-rom.h - -bios/bios.bin.elf: bios/entry.S bios/e820.c bios/int10.c bios/int15.c bios/rom.ld.S - $(E) " CC bios/memcpy.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/memcpy.c -o bios/memcpy.o - $(E) " CC bios/e820.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/e820.c -o bios/e820.o - $(E) " CC bios/int10.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int10.c -o bios/int10.o - $(E) " CC bios/int15.o" - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int15.c -o bios/int15.o - $(E) " CC bios/entry.o" - $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/entry.S -o bios/entry.o +x86/bios.o: x86/bios/bios.bin x86/bios/bios-rom.h + +x86/bios/bios.bin.elf: x86/bios/entry.S x86/bios/e820.c x86/bios/int10.c x86/bios/int15.c x86/bios/rom.ld.S + $(E) " CC x86/bios/memcpy.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/memcpy.c -o x86/bios/memcpy.o + $(E) " CC x86/bios/e820.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/e820.c -o x86/bios/e820.o + $(E) " CC x86/bios/int10.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int10.c -o x86/bios/int10.o + $(E) " CC x86/bios/int15.o" + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int15.c -o x86/bios/int15.o + $(E) " CC x86/bios/entry.o" + $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/entry.S -o x86/bios/entry.o $(E) " LD " $@ - $(Q) ld -T bios/rom.ld.S -o bios/bios.bin.elf bios/memcpy.o bios/entry.o bios/e820.o bios/int10.o bios/int15.o + $(Q) ld -T x86/bios/rom.ld.S -o x86/bios/bios.bin.elf x86/bios/memcpy.o x86/bios/entry.o x86/bios/e820.o x86/bios/int10.o x86/bios/int15.o -bios/bios.bin: bios/bios.bin.elf +x86/bios/bios.bin: x86/bios/bios.bin.elf $(E) " OBJCOPY " $@ - $(Q) objcopy -O binary -j .text bios/bios.bin.elf bios/bios.bin + $(Q) objcopy -O binary -j .text x86/bios/bios.bin.elf x86/bios/bios.bin -bios/bios-rom.o: bios/bios-rom.S bios/bios.bin bios/bios-rom.h +x86/bios/bios-rom.o: x86/bios/bios-rom.S x86/bios/bios.bin x86/bios/bios-rom.h $(E) " CC " $@ - $(Q) $(CC) -c $(CFLAGS) bios/bios-rom.S -o bios/bios-rom.o + $(Q) $(CC) -c $(CFLAGS) x86/bios/bios-rom.S -o x86/bios/bios-rom.o -bios/bios-rom.h: bios/bios.bin.elf +x86/bios/bios-rom.h: x86/bios/bios.bin.elf $(E) " NM " $@ - $(Q) cd bios && sh gen-offsets.sh > bios-rom.h && cd .. + $(Q) cd x86/bios && sh gen-offsets.sh > bios-rom.h && cd .. check: $(PROGRAM) $(MAKE) -C tests @@ -263,10 +287,10 @@ check: $(PROGRAM) clean: $(E) " CLEAN" - $(Q) rm -f bios/*.bin - $(Q) rm -f bios/*.elf - $(Q) rm -f bios/*.o - $(Q) rm -f bios/bios-rom.h + $(Q) rm -f x86/bios/*.bin + $(Q) rm -f x86/bios/*.elf + $(Q) rm -f x86/bios/*.o + $(Q) rm -f x86/bios/bios-rom.h $(Q) rm -f tests/boot/boot_test.iso $(Q) rm -rf tests/boot/rootfs/ $(Q) rm -f $(DEPS) $(OBJS) $(PROGRAM) $(GUEST_INIT) diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c index 33de4f6..9148d83 100644 --- a/tools/kvm/builtin-run.c +++ b/tools/kvm/builtin-run.c @@ -568,7 +568,7 @@ static const char *host_kernels[] = { static const char *default_kernels[] = { "./bzImage", - "../../arch/x86/boot/bzImage", + "../../arch/" BUILD_ARCH "/boot/bzImage", NULL }; @@ -886,7 +886,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) kvm->vmlinux = vmlinux_filename; - ioport__setup_legacy(); + ioport__setup_arch(); rtc__init(); @@ -931,7 +931,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) kvm__start_timer(kvm); - kvm__setup_bios(kvm); + kvm__arch_setup_firmware(kvm); for (i = 0; i < nrcpus; i++) { kvm_cpus[i] = kvm_cpu__init(kvm, i); diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h index 5b857dd..61a70ec 100644 --- a/tools/kvm/include/kvm/ioport.h +++ b/tools/kvm/include/kvm/ioport.h @@ -28,7 +28,7 @@ struct ioport_operations { bool (*io_out)(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size); }; -void ioport__setup_legacy(void); +void ioport__setup_arch(void); u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param); diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h index 01540ac..719e286 100644 --- a/tools/kvm/include/kvm/kvm-cpu.h +++ b/tools/kvm/include/kvm/kvm-cpu.h @@ -1,32 +1,7 @@ #ifndef KVM__KVM_CPU_H #define KVM__KVM_CPU_H -#include <linux/kvm.h> /* for struct kvm_regs */ - -#include <pthread.h> - -struct kvm; - -struct kvm_cpu { - pthread_t thread; /* VCPU thread */ - - unsigned long cpu_id; - - struct kvm *kvm; /* parent KVM */ - int vcpu_fd; /* For VCPU ioctls() */ - struct kvm_run *kvm_run; - - struct kvm_regs regs; - struct kvm_sregs sregs; - struct kvm_fpu fpu; - - struct kvm_msrs *msrs; /* dynamically allocated */ - - u8 is_running; - u8 paused; - - struct kvm_coalesced_mmio_ring *ring; -}; +#include "kvm/kvm-cpu-arch.h" struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id); void kvm_cpu__delete(struct kvm_cpu *vcpu); diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h index 2b3024a..ca1acc0 100644 --- a/tools/kvm/include/kvm/kvm.h +++ b/tools/kvm/include/kvm/kvm.h @@ -1,22 +1,13 @@ #ifndef KVM__KVM_H #define KVM__KVM_H -#include "kvm/interrupt.h" -#include "kvm/segment.h" +#include "kvm/kvm-arch.h" #include <stdbool.h> #include <linux/types.h> #include <time.h> #include <signal.h> -#define KVM_NR_CPUS (255) - -/* - * The hole includes VESA framebuffer and PCI memory. - */ -#define KVM_32BIT_GAP_SIZE (768 << 20) -#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) - #define SIGKVMEXIT (SIGRTMIN + 0) #define SIGKVMPAUSE (SIGRTMIN + 1) #define SIGKVMSTOP (SIGRTMIN + 4) @@ -25,33 +16,15 @@ #define KVM_PID_FILE_PATH "/.kvm-tools/" #define HOME_DIR getenv("HOME") -struct kvm { - int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ - int vm_fd; /* For VM ioctls() */ - timer_t timerid; /* Posix timer for interrupts */ - - int nrcpus; /* Number of cpus to run */ - - u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ - - u64 ram_size; - void *ram_start; - - bool nmi_disabled; - - bool single_step; +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) - u16 boot_selector; - u16 boot_ip; - u16 boot_sp; +#define DEFINE_KVM_EXT(ext) \ + .name = #ext, \ + .code = ext - struct interrupt_table interrupt_table; - - const char *vmlinux; - struct disk_image **disks; - int nr_disks; - - const char *name; +struct kvm_ext { + const char *name; + int code; }; void kvm__set_dir(const char *fmt, ...); @@ -64,7 +37,6 @@ void kvm__init_ram(struct kvm *kvm); void kvm__delete(struct kvm *kvm); bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, const char *initrd_filename, const char *kernel_cmdline, u16 vidmode); -void kvm__setup_bios(struct kvm *kvm); void kvm__start_timer(struct kvm *kvm); void kvm__stop_timer(struct kvm *kvm); void kvm__irq_line(struct kvm *kvm, int irq, int level); @@ -81,6 +53,13 @@ int kvm__get_sock_by_instance(const char *name); int kvm__enumerate_instances(int (*callback)(const char *name, int pid)); void kvm__remove_socket(const char *name); +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name); +void kvm__arch_setup_firmware(struct kvm *kvm); +bool kvm__arch_cpu_supports_vm(void); + +int load_flat_binary(struct kvm *kvm, int fd); +bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline, u16 vidmode); + /* * Debugging */ @@ -98,11 +77,4 @@ static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset) return kvm->ram_start + offset; } -static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) -{ - unsigned long flat = segment_to_flat(selector, offset); - - return guest_flat_to_host(kvm, flat); -} - #endif /* KVM__KVM_H */ diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c index 7cbc44e..965cfc2 100644 --- a/tools/kvm/ioport.c +++ b/tools/kvm/ioport.c @@ -52,34 +52,6 @@ static int ioport_insert(struct rb_root *root, struct ioport *data) return rb_int_insert(root, &data->node); } -static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) -{ - exit(EXIT_SUCCESS); -} - -static struct ioport_operations debug_ops = { - .io_out = debug_io_out, -}; - -static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) -{ - return true; -} - -static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) -{ - return true; -} - -static struct ioport_operations dummy_read_write_ioport_ops = { - .io_in = dummy_io_in, - .io_out = dummy_io_out, -}; - -static struct ioport_operations dummy_write_only_ioport_ops = { - .io_out = dummy_io_out, -}; - u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param) { struct ioport *entry; @@ -164,29 +136,3 @@ error: return !ioport_debug; } - -void ioport__setup_legacy(void) -{ - /* 0x0020 - 0x003F - 8259A PIC 1 */ - ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); - - /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ - ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); - - /* 0x00A0 - 0x00AF - 8259A PIC 2 */ - ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); - - /* PORT 00E0-00EF are 'motherboard specific' so we use them for our - internal debugging purposes. */ - ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); - - /* PORT 00ED - DUMMY PORT FOR DELAY??? */ - ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); - - /* 0x00F0 - 0x00FF - Math co-processor */ - ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); - - /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ - ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); - ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); -} diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c index 0ad6f3b..5aba3bb 100644 --- a/tools/kvm/kvm-cpu.c +++ b/tools/kvm/kvm-cpu.c @@ -4,8 +4,6 @@ #include "kvm/util.h" #include "kvm/kvm.h" -#include <asm/msr-index.h> - #include <sys/ioctl.h> #include <sys/mman.h> #include <signal.h> @@ -14,106 +12,9 @@ #include <errno.h> #include <stdio.h> -#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) - extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; extern __thread struct kvm_cpu *current_kvm_cpu; -static int debug_fd; - -void kvm_cpu__set_debug_fd(int fd) -{ - debug_fd = fd; -} - -int kvm_cpu__get_debug_fd(void) -{ - return debug_fd; -} - -static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) -{ - return vcpu->sregs.cr0 & 0x01; -} - -static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) -{ - u64 cs; - - /* - * NOTE! We should take code segment base address into account here. - * Luckily it's usually zero because Linux uses flat memory model. - */ - if (is_in_protected_mode(vcpu)) - return ip; - - cs = vcpu->sregs.cs.selector; - - return ip + (cs << 4); -} - -static inline u32 selector_to_base(u16 selector) -{ - /* - * KVM on Intel requires 'base' to be 'selector * 16' in real mode. - */ - return (u32)selector * 16; -} - -static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) -{ - struct kvm_cpu *vcpu; - - vcpu = calloc(1, sizeof *vcpu); - if (!vcpu) - return NULL; - - vcpu->kvm = kvm; - - return vcpu; -} - -void kvm_cpu__delete(struct kvm_cpu *vcpu) -{ - if (vcpu->msrs) - free(vcpu->msrs); - - free(vcpu); -} - -struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) -{ - struct kvm_cpu *vcpu; - int mmap_size; - int coalesced_offset; - - vcpu = kvm_cpu__new(kvm); - if (!vcpu) - return NULL; - - vcpu->cpu_id = cpu_id; - - vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); - if (vcpu->vcpu_fd < 0) - die_perror("KVM_CREATE_VCPU ioctl"); - - mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); - if (mmap_size < 0) - die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); - - vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); - if (vcpu->kvm_run == MAP_FAILED) - die("unable to mmap vcpu fd"); - - coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); - if (coalesced_offset) - vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); - - vcpu->is_running = true; - - return vcpu; -} - void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) { struct kvm_guest_debug debug = { @@ -124,278 +25,6 @@ void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) pr_warning("KVM_SET_GUEST_DEBUG failed"); } -static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) -{ - struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); - - if (!vcpu) - die("out of memory"); - - return vcpu; -} - -#define KVM_MSR_ENTRY(_index, _data) \ - (struct kvm_msr_entry) { .index = _index, .data = _data } - -static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) -{ - unsigned long ndx = 0; - - vcpu->msrs = kvm_msrs__new(100); - - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); -#ifdef CONFIG_X86_64 - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); -#endif - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, - MSR_IA32_MISC_ENABLE_FAST_STRING); - - vcpu->msrs->nmsrs = ndx; - - if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) - die_perror("KVM_SET_MSRS failed"); -} - -static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) -{ - vcpu->fpu = (struct kvm_fpu) { - .fcw = 0x37f, - .mxcsr = 0x1f80, - }; - - if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) - die_perror("KVM_SET_FPU failed"); -} - -static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) -{ - vcpu->regs = (struct kvm_regs) { - /* We start the guest in 16-bit real mode */ - .rflags = 0x0000000000000002ULL, - - .rip = vcpu->kvm->boot_ip, - .rsp = vcpu->kvm->boot_sp, - .rbp = vcpu->kvm->boot_sp, - }; - - if (vcpu->regs.rip > USHRT_MAX) - die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); - - if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) - die_perror("KVM_SET_REGS failed"); -} - -static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) -{ - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) - die_perror("KVM_GET_SREGS failed"); - - vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; - vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; - vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; - vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.es.selector = vcpu->kvm->boot_selector; - vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; - vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); - vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; - vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); - - if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) - die_perror("KVM_SET_SREGS failed"); -} - -/** - * kvm_cpu__reset_vcpu - reset virtual CPU to a known state - */ -void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) -{ - kvm_cpu__setup_sregs(vcpu); - kvm_cpu__setup_regs(vcpu); - kvm_cpu__setup_fpu(vcpu); - kvm_cpu__setup_msrs(vcpu); -} - -static void print_dtable(const char *name, struct kvm_dtable *dtable) -{ - dprintf(debug_fd, " %s %016llx %08hx\n", - name, (u64) dtable->base, (u16) dtable->limit); -} - -static void print_segment(const char *name, struct kvm_segment *seg) -{ - dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", - name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, - (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); -} - -void kvm_cpu__show_registers(struct kvm_cpu *vcpu) -{ - unsigned long cr0, cr2, cr3; - unsigned long cr4, cr8; - unsigned long rax, rbx, rcx; - unsigned long rdx, rsi, rdi; - unsigned long rbp, r8, r9; - unsigned long r10, r11, r12; - unsigned long r13, r14, r15; - unsigned long rip, rsp; - struct kvm_sregs sregs; - unsigned long rflags; - struct kvm_regs regs; - int i; - - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) - die("KVM_GET_REGS failed"); - - rflags = regs.rflags; - - rip = regs.rip; rsp = regs.rsp; - rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; - rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; - rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; - r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; - r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; - - dprintf(debug_fd, "\n Registers:\n"); - dprintf(debug_fd, " ----------\n"); - dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); - dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); - dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); - dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); - dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); - dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) - die("KVM_GET_REGS failed"); - - cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; - cr4 = sregs.cr4; cr8 = sregs.cr8; - - dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); - dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); - dprintf(debug_fd, "\n Segment registers:\n"); - dprintf(debug_fd, " ------------------\n"); - dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); - print_segment("cs ", &sregs.cs); - print_segment("ss ", &sregs.ss); - print_segment("ds ", &sregs.ds); - print_segment("es ", &sregs.es); - print_segment("fs ", &sregs.fs); - print_segment("gs ", &sregs.gs); - print_segment("tr ", &sregs.tr); - print_segment("ldt", &sregs.ldt); - print_dtable("gdt", &sregs.gdt); - print_dtable("idt", &sregs.idt); - - dprintf(debug_fd, "\n APIC:\n"); - dprintf(debug_fd, " -----\n"); - dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", - (u64) sregs.efer, (u64) sregs.apic_base, - (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); - - dprintf(debug_fd, "\n Interrupt bitmap:\n"); - dprintf(debug_fd, " -----------------\n"); - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) - dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); - dprintf(debug_fd, "\n"); -} - -#define MAX_SYM_LEN 128 - -void kvm_cpu__show_code(struct kvm_cpu *vcpu) -{ - unsigned int code_bytes = 64; - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - char sym[MAX_SYM_LEN]; - unsigned char c; - unsigned int i; - u8 *ip; - - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) - die("KVM_GET_REGS failed"); - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) - die("KVM_GET_SREGS failed"); - - ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); - - dprintf(debug_fd, "\n Code:\n"); - dprintf(debug_fd, " -----\n"); - - symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); - - dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); - - for (i = 0; i < code_len; i++, ip++) { - if (!host_ptr_in_ram(vcpu->kvm, ip)) - break; - - c = *ip; - - if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) - dprintf(debug_fd, " <%02x>", c); - else - dprintf(debug_fd, " %02x", c); - } - - dprintf(debug_fd, "\n"); - - dprintf(debug_fd, "\n Stack:\n"); - dprintf(debug_fd, " ------\n"); - kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); -} - -void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) -{ - u64 *pte1; - u64 *pte2; - u64 *pte3; - u64 *pte4; - - if (!is_in_protected_mode(vcpu)) - return; - - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) - die("KVM_GET_SREGS failed"); - - pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); - if (!host_ptr_in_ram(vcpu->kvm, pte4)) - return; - - pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); - if (!host_ptr_in_ram(vcpu->kvm, pte3)) - return; - - pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); - if (!host_ptr_in_ram(vcpu->kvm, pte2)) - return; - - pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); - if (!host_ptr_in_ram(vcpu->kvm, pte1)) - return; - - dprintf(debug_fd, "Page Tables:\n"); - if (*pte2 & (1 << 7)) - dprintf(debug_fd, " pte4: %016llx pte3: %016llx" - " pte2: %016llx\n", - *pte4, *pte3, *pte2); - else - dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" - "llx pte1: %016llx\n", - *pte4, *pte3, *pte2, *pte1); -} - void kvm_cpu__run(struct kvm_cpu *vcpu) { int err; @@ -454,7 +83,6 @@ int kvm_cpu__start(struct kvm_cpu *cpu) signal(SIGKVMEXIT, kvm_cpu_signal_handler); signal(SIGKVMPAUSE, kvm_cpu_signal_handler); - kvm_cpu__setup_cpuid(cpu); kvm_cpu__reset_vcpu(cpu); if (cpu->kvm->single_step) diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c index 252bd18..7ce1640 100644 --- a/tools/kvm/kvm.c +++ b/tools/kvm/kvm.c @@ -1,10 +1,5 @@ #include "kvm/kvm.h" - -#include "kvm/boot-protocol.h" -#include "kvm/cpufeature.h" #include "kvm/read-write.h" -#include "kvm/interrupt.h" -#include "kvm/mptable.h" #include "kvm/util.h" #include "kvm/mutex.h" #include "kvm/kvm-cpu.h" @@ -12,14 +7,11 @@ #include <linux/kvm.h> -#include <asm/bootparam.h> - #include <sys/un.h> #include <sys/types.h> #include <sys/socket.h> #include <sys/ioctl.h> #include <sys/mman.h> -#include <sys/stat.h> #include <stdbool.h> #include <assert.h> #include <limits.h> @@ -58,29 +50,11 @@ const char *kvm_exit_reasons[] = { DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), }; -#define DEFINE_KVM_EXT(ext) \ - .name = #ext, \ - .code = ext - -struct { - const char *name; - int code; -} kvm_req_ext[] = { - { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, - { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, - { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, - { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, - { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, - { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, - { DEFINE_KVM_EXT(KVM_CAP_HLT) }, - { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, - { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, -}; - extern struct kvm *kvm; extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; static int pause_event; static DEFINE_MUTEX(pause_lock); +extern struct kvm_ext kvm_req_ext[]; static char kvm_dir[PATH_MAX]; @@ -127,7 +101,9 @@ static int kvm__check_extensions(struct kvm *kvm) { unsigned int i; - for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) { + for (i = 0; ; i++) { + if (!kvm_req_ext[i].name) + break; if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) { pr_error("Unsuppored KVM extension detected: %s", kvm_req_ext[i].name); @@ -261,48 +237,6 @@ void kvm__delete(struct kvm *kvm) free(kvm); } -static bool kvm__cpu_supports_vm(void) -{ - struct cpuid_regs regs; - u32 eax_base; - int feature; - - regs = (struct cpuid_regs) { - .eax = 0x00, - }; - host_cpuid(®s); - - switch (regs.ebx) { - case CPUID_VENDOR_INTEL_1: - eax_base = 0x00; - feature = KVM__X86_FEATURE_VMX; - break; - - case CPUID_VENDOR_AMD_1: - eax_base = 0x80000000; - feature = KVM__X86_FEATURE_SVM; - break; - - default: - return false; - } - - regs = (struct cpuid_regs) { - .eax = eax_base, - }; - host_cpuid(®s); - - if (regs.eax < eax_base + 0x01) - return false; - - regs = (struct cpuid_regs) { - .eax = eax_base + 0x01 - }; - host_cpuid(®s); - - return regs.ecx & (1 << feature); -} - /* * Note: KVM_SET_USER_MEMORY_REGION assumes that we don't pass overlapping * memory regions to it. Therefore, be careful if you use this function for @@ -325,47 +259,6 @@ void kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspac die_perror("KVM_SET_USER_MEMORY_REGION ioctl"); } -/* - * Allocating RAM size bigger than 4GB requires us to leave a gap - * in the RAM which is used for PCI MMIO, hotplug, and unconfigured - * devices (see documentation of e820_setup_gap() for details). - * - * If we're required to initialize RAM bigger than 4GB, we will create - * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. - */ - -void kvm__init_ram(struct kvm *kvm) -{ - u64 phys_start, phys_size; - void *host_mem; - - if (kvm->ram_size < KVM_32BIT_GAP_START) { - /* Use a single block of RAM for 32bit RAM */ - - phys_start = 0; - phys_size = kvm->ram_size; - host_mem = kvm->ram_start; - - kvm__register_mem(kvm, phys_start, phys_size, host_mem); - } else { - /* First RAM range from zero to the PCI gap: */ - - phys_start = 0; - phys_size = KVM_32BIT_GAP_START; - host_mem = kvm->ram_start; - - kvm__register_mem(kvm, phys_start, phys_size, host_mem); - - /* Second RAM range from 4GB to the end of RAM: */ - - phys_start = 0x100000000ULL; - phys_size = kvm->ram_size - phys_size; - host_mem = kvm->ram_start + phys_start; - - kvm__register_mem(kvm, phys_start, phys_size, host_mem); - } -} - int kvm__recommended_cpus(struct kvm *kvm) { int ret; @@ -410,11 +303,10 @@ int kvm__max_cpus(struct kvm *kvm) struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) { - struct kvm_pit_config pit_config = { .flags = 0, }; struct kvm *kvm; int ret; - if (!kvm__cpu_supports_vm()) + if (!kvm__arch_cpu_supports_vm()) die("Your CPU does not support hardware virtualization"); kvm = kvm__new(); @@ -442,36 +334,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) if (kvm__check_extensions(kvm)) die("A required KVM extention is not supported by OS"); - ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); - if (ret < 0) - die_perror("KVM_SET_TSS_ADDR ioctl"); - - ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); - if (ret < 0) - die_perror("KVM_CREATE_PIT2 ioctl"); - - kvm->ram_size = ram_size; - - if (kvm->ram_size < KVM_32BIT_GAP_START) { - kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); - } else { - kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); - if (kvm->ram_start != MAP_FAILED) { - /* - * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that - * if we accidently write to it, we will know. - */ - mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); - } - } - if (kvm->ram_start == MAP_FAILED) - die("out of memory"); - - madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); - - ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); - if (ret < 0) - die_perror("KVM_CREATE_IRQCHIP ioctl"); + kvm__arch_init(kvm, kvm_dev, ram_size, name); kvm->name = name; @@ -480,141 +343,6 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) return kvm; } -#define BOOT_LOADER_SELECTOR 0x1000 -#define BOOT_LOADER_IP 0x0000 -#define BOOT_LOADER_SP 0x8000 -#define BOOT_CMDLINE_OFFSET 0x20000 - -#define BOOT_PROTOCOL_REQUIRED 0x206 -#define LOAD_HIGH 0x01 - -static int load_flat_binary(struct kvm *kvm, int fd) -{ - void *p; - int nr; - - if (lseek(fd, 0, SEEK_SET) < 0) - die_perror("lseek"); - - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); - - while ((nr = read(fd, p, 65536)) > 0) - p += nr; - - kvm->boot_selector = BOOT_LOADER_SELECTOR; - kvm->boot_ip = BOOT_LOADER_IP; - kvm->boot_sp = BOOT_LOADER_SP; - - return true; -} - -static const char *BZIMAGE_MAGIC = "HdrS"; - -static bool load_bzimage(struct kvm *kvm, int fd_kernel, - int fd_initrd, const char *kernel_cmdline, u16 vidmode) -{ - struct boot_params *kern_boot; - unsigned long setup_sects; - struct boot_params boot; - size_t cmdline_size; - ssize_t setup_size; - void *p; - int nr; - - /* - * See Documentation/x86/boot.txt for details no bzImage on-disk and - * memory layout. - */ - - if (lseek(fd_kernel, 0, SEEK_SET) < 0) - die_perror("lseek"); - - if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) - return false; - - if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) - return false; - - if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) - die("Too old kernel"); - - if (lseek(fd_kernel, 0, SEEK_SET) < 0) - die_perror("lseek"); - - if (!boot.hdr.setup_sects) - boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; - setup_sects = boot.hdr.setup_sects + 1; - - setup_size = setup_sects << 9; - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); - - /* copy setup.bin to mem*/ - if (read(fd_kernel, p, setup_size) != setup_size) - die_perror("read"); - - /* copy vmlinux.bin to BZ_KERNEL_START*/ - p = guest_flat_to_host(kvm, BZ_KERNEL_START); - - while ((nr = read(fd_kernel, p, 65536)) > 0) - p += nr; - - p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); - if (kernel_cmdline) { - cmdline_size = strlen(kernel_cmdline) + 1; - if (cmdline_size > boot.hdr.cmdline_size) - cmdline_size = boot.hdr.cmdline_size; - - memset(p, 0, boot.hdr.cmdline_size); - memcpy(p, kernel_cmdline, cmdline_size - 1); - } - - kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); - - kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; - kern_boot->hdr.type_of_loader = 0xff; - kern_boot->hdr.heap_end_ptr = 0xfe00; - kern_boot->hdr.loadflags |= CAN_USE_HEAP; - kern_boot->hdr.vid_mode = vidmode; - - /* - * Read initrd image into guest memory - */ - if (fd_initrd >= 0) { - struct stat initrd_stat; - unsigned long addr; - - if (fstat(fd_initrd, &initrd_stat)) - die_perror("fstat"); - - addr = boot.hdr.initrd_addr_max & ~0xfffff; - for (;;) { - if (addr < BZ_KERNEL_START) - die("Not enough memory for initrd"); - else if (addr < (kvm->ram_size - initrd_stat.st_size)) - break; - addr -= 0x100000; - } - - p = guest_flat_to_host(kvm, addr); - nr = read(fd_initrd, p, initrd_stat.st_size); - if (nr != initrd_stat.st_size) - die("Failed to read initrd"); - - kern_boot->hdr.ramdisk_image = addr; - kern_boot->hdr.ramdisk_size = initrd_stat.st_size; - } - - kvm->boot_selector = BOOT_LOADER_SELECTOR; - /* - * The real-mode setup code starts at offset 0x200 of a bzImage. See - * Documentation/x86/boot.txt for details. - */ - kvm->boot_ip = BOOT_LOADER_IP + 0x200; - kvm->boot_sp = BOOT_LOADER_SP; - - return true; -} - /* RFC 1952 */ #define GZIP_ID1 0x1f #define GZIP_ID2 0x8b @@ -675,24 +403,6 @@ found_kernel: return ret; } -/** - * kvm__setup_bios - inject BIOS into guest system memory - * @kvm - guest system descriptor - * - * This function is a main routine where we poke guest memory - * and install BIOS there. - */ -void kvm__setup_bios(struct kvm *kvm) -{ - /* standart minimal configuration */ - setup_bios(kvm); - - /* FIXME: SMP, ACPI and friends here */ - - /* MP table */ - mptable_setup(kvm, kvm->nrcpus); -} - #define TIMER_INTERVAL_NS 1000000 /* 1 msec */ /* @@ -732,27 +442,6 @@ void kvm__stop_timer(struct kvm *kvm) kvm->timerid = 0; } -void kvm__irq_line(struct kvm *kvm, int irq, int level) -{ - struct kvm_irq_level irq_level; - - irq_level = (struct kvm_irq_level) { - { - .irq = irq, - }, - .level = level, - }; - - if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) - die_perror("KVM_IRQ_LINE failed"); -} - -void kvm__irq_trigger(struct kvm *kvm, int irq) -{ - kvm__irq_line(kvm, irq, 1); - kvm__irq_line(kvm, irq, 0); -} - void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size) { unsigned char *p; diff --git a/tools/kvm/bios.c b/tools/kvm/x86/bios.c similarity index 100% rename from tools/kvm/bios.c rename to tools/kvm/x86/bios.c diff --git a/tools/kvm/bios/.gitignore b/tools/kvm/x86/bios/.gitignore similarity index 100% rename from tools/kvm/bios/.gitignore rename to tools/kvm/x86/bios/.gitignore diff --git a/tools/kvm/bios/bios-rom.S b/tools/kvm/x86/bios/bios-rom.S similarity index 80% rename from tools/kvm/bios/bios-rom.S rename to tools/kvm/x86/bios/bios-rom.S index dc52b1e..3269ce9 100644 --- a/tools/kvm/bios/bios-rom.S +++ b/tools/kvm/x86/bios/bios-rom.S @@ -8,5 +8,5 @@ #endif GLOBAL(bios_rom) - .incbin "bios/bios.bin" + .incbin "x86/bios/bios.bin" END(bios_rom) diff --git a/tools/kvm/bios/e820.c b/tools/kvm/x86/bios/e820.c similarity index 100% rename from tools/kvm/bios/e820.c rename to tools/kvm/x86/bios/e820.c diff --git a/tools/kvm/bios/entry.S b/tools/kvm/x86/bios/entry.S similarity index 100% rename from tools/kvm/bios/entry.S rename to tools/kvm/x86/bios/entry.S diff --git a/tools/kvm/bios/gen-offsets.sh b/tools/kvm/x86/bios/gen-offsets.sh similarity index 100% rename from tools/kvm/bios/gen-offsets.sh rename to tools/kvm/x86/bios/gen-offsets.sh diff --git a/tools/kvm/bios/int10.c b/tools/kvm/x86/bios/int10.c similarity index 100% rename from tools/kvm/bios/int10.c rename to tools/kvm/x86/bios/int10.c diff --git a/tools/kvm/bios/int15.c b/tools/kvm/x86/bios/int15.c similarity index 100% rename from tools/kvm/bios/int15.c rename to tools/kvm/x86/bios/int15.c diff --git a/tools/kvm/bios/local.S b/tools/kvm/x86/bios/local.S similarity index 100% rename from tools/kvm/bios/local.S rename to tools/kvm/x86/bios/local.S diff --git a/tools/kvm/bios/macro.S b/tools/kvm/x86/bios/macro.S similarity index 100% rename from tools/kvm/bios/macro.S rename to tools/kvm/x86/bios/macro.S diff --git a/tools/kvm/bios/memcpy.c b/tools/kvm/x86/bios/memcpy.c similarity index 100% rename from tools/kvm/bios/memcpy.c rename to tools/kvm/x86/bios/memcpy.c diff --git a/tools/kvm/bios/rom.ld.S b/tools/kvm/x86/bios/rom.ld.S similarity index 100% rename from tools/kvm/bios/rom.ld.S rename to tools/kvm/x86/bios/rom.ld.S diff --git a/tools/kvm/cpuid.c b/tools/kvm/x86/cpuid.c similarity index 100% rename from tools/kvm/cpuid.c rename to tools/kvm/x86/cpuid.c diff --git a/tools/kvm/include/kvm/assembly.h b/tools/kvm/x86/include/kvm/assembly.h similarity index 100% rename from tools/kvm/include/kvm/assembly.h rename to tools/kvm/x86/include/kvm/assembly.h diff --git a/tools/kvm/include/kvm/barrier.h b/tools/kvm/x86/include/kvm/barrier.h similarity index 100% rename from tools/kvm/include/kvm/barrier.h rename to tools/kvm/x86/include/kvm/barrier.h diff --git a/tools/kvm/include/kvm/bios-export.h b/tools/kvm/x86/include/kvm/bios-export.h similarity index 100% rename from tools/kvm/include/kvm/bios-export.h rename to tools/kvm/x86/include/kvm/bios-export.h diff --git a/tools/kvm/include/kvm/bios.h b/tools/kvm/x86/include/kvm/bios.h similarity index 100% rename from tools/kvm/include/kvm/bios.h rename to tools/kvm/x86/include/kvm/bios.h diff --git a/tools/kvm/include/kvm/boot-protocol.h b/tools/kvm/x86/include/kvm/boot-protocol.h similarity index 100% rename from tools/kvm/include/kvm/boot-protocol.h rename to tools/kvm/x86/include/kvm/boot-protocol.h diff --git a/tools/kvm/include/kvm/cpufeature.h b/tools/kvm/x86/include/kvm/cpufeature.h similarity index 100% rename from tools/kvm/include/kvm/cpufeature.h rename to tools/kvm/x86/include/kvm/cpufeature.h diff --git a/tools/kvm/include/kvm/interrupt.h b/tools/kvm/x86/include/kvm/interrupt.h similarity index 100% rename from tools/kvm/include/kvm/interrupt.h rename to tools/kvm/x86/include/kvm/interrupt.h diff --git a/tools/kvm/x86/include/kvm/kvm-arch.h b/tools/kvm/x86/include/kvm/kvm-arch.h new file mode 100644 index 0000000..02aa8b9 --- /dev/null +++ b/tools/kvm/x86/include/kvm/kvm-arch.h @@ -0,0 +1,59 @@ +#ifndef KVM__KVM_ARCH_H +#define KVM__KVM_ARCH_H + +#include "kvm/interrupt.h" +#include "kvm/segment.h" + +#include <stdbool.h> +#include <linux/types.h> +#include <time.h> + +#define KVM_NR_CPUS (255) + +/* + * The hole includes VESA framebuffer and PCI memory. + */ +#define KVM_32BIT_GAP_SIZE (768 << 20) +#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) + +#define KVM_MMIO_START KVM_32BIT_GAP_START + +struct kvm { + int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ + int vm_fd; /* For VM ioctls() */ + timer_t timerid; /* Posix timer for interrupts */ + + int nrcpus; /* Number of cpus to run */ + + u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ + + u64 ram_size; + void *ram_start; + + bool nmi_disabled; + + bool single_step; + + u16 boot_selector; + u16 boot_ip; + u16 boot_sp; + + struct interrupt_table interrupt_table; + + const char *vmlinux; + struct disk_image **disks; + int nr_disks; + + const char *name; +}; + +static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset); /* In kvm.h */ + +static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) +{ + unsigned long flat = segment_to_flat(selector, offset); + + return guest_flat_to_host(kvm, flat); +} + +#endif /* KVM__KVM_ARCH_H */ diff --git a/tools/kvm/x86/include/kvm/kvm-cpu-arch.h b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h new file mode 100644 index 0000000..ed1c727 --- /dev/null +++ b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h @@ -0,0 +1,33 @@ +#ifndef KVM__KVM_CPU_ARCH_H +#define KVM__KVM_CPU_ARCH_H + +/* Architecture-specific kvm_cpu definitions. */ + +#include <linux/kvm.h> /* for struct kvm_regs */ + +#include <pthread.h> + +struct kvm; + +struct kvm_cpu { + pthread_t thread; /* VCPU thread */ + + unsigned long cpu_id; + + struct kvm *kvm; /* parent KVM */ + int vcpu_fd; /* For VCPU ioctls() */ + struct kvm_run *kvm_run; + + struct kvm_regs regs; + struct kvm_sregs sregs; + struct kvm_fpu fpu; + + struct kvm_msrs *msrs; /* dynamically allocated */ + + u8 is_running; + u8 paused; + + struct kvm_coalesced_mmio_ring *ring; +}; + +#endif /* KVM__KVM_CPU_ARCH_H */ diff --git a/tools/kvm/include/kvm/mptable.h b/tools/kvm/x86/include/kvm/mptable.h similarity index 100% rename from tools/kvm/include/kvm/mptable.h rename to tools/kvm/x86/include/kvm/mptable.h diff --git a/tools/kvm/interrupt.c b/tools/kvm/x86/interrupt.c similarity index 100% rename from tools/kvm/interrupt.c rename to tools/kvm/x86/interrupt.c diff --git a/tools/kvm/x86/ioport.c b/tools/kvm/x86/ioport.c new file mode 100644 index 0000000..8a91bf2 --- /dev/null +++ b/tools/kvm/x86/ioport.c @@ -0,0 +1,59 @@ +#include "kvm/ioport.h" + +#include <stdlib.h> + +static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) +{ + exit(EXIT_SUCCESS); +} + +static struct ioport_operations debug_ops = { + .io_out = debug_io_out, +}; + +static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) +{ + return true; +} + +static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) +{ + return true; +} + +static struct ioport_operations dummy_read_write_ioport_ops = { + .io_in = dummy_io_in, + .io_out = dummy_io_out, +}; + +static struct ioport_operations dummy_write_only_ioport_ops = { + .io_out = dummy_io_out, +}; + +void ioport__setup_arch(void) +{ + /* Legacy ioport setup */ + + /* 0x0020 - 0x003F - 8259A PIC 1 */ + ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); + + /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ + ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); + + /* 0x00A0 - 0x00AF - 8259A PIC 2 */ + ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); + + /* PORT 00E0-00EF are 'motherboard specific' so we use them for our + internal debugging purposes. */ + ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); + + /* PORT 00ED - DUMMY PORT FOR DELAY??? */ + ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); + + /* 0x00F0 - 0x00FF - Math co-processor */ + ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); + + /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ + ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); + ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); +} diff --git a/tools/kvm/irq.c b/tools/kvm/x86/irq.c similarity index 100% rename from tools/kvm/irq.c rename to tools/kvm/x86/irq.c diff --git a/tools/kvm/x86/kvm-cpu.c b/tools/kvm/x86/kvm-cpu.c new file mode 100644 index 0000000..b26b208 --- /dev/null +++ b/tools/kvm/x86/kvm-cpu.c @@ -0,0 +1,383 @@ +#include "kvm/kvm-cpu.h" + +#include "kvm/symbol.h" +#include "kvm/util.h" +#include "kvm/kvm.h" + +#include <asm/msr-index.h> + +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <signal.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <stdio.h> + +static int debug_fd; + +void kvm_cpu__set_debug_fd(int fd) +{ + debug_fd = fd; +} + +int kvm_cpu__get_debug_fd(void) +{ + return debug_fd; +} + +static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) +{ + return vcpu->sregs.cr0 & 0x01; +} + +static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) +{ + u64 cs; + + /* + * NOTE! We should take code segment base address into account here. + * Luckily it's usually zero because Linux uses flat memory model. + */ + if (is_in_protected_mode(vcpu)) + return ip; + + cs = vcpu->sregs.cs.selector; + + return ip + (cs << 4); +} + +static inline u32 selector_to_base(u16 selector) +{ + /* + * KVM on Intel requires 'base' to be 'selector * 16' in real mode. + */ + return (u32)selector * 16; +} + +static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) +{ + struct kvm_cpu *vcpu; + + vcpu = calloc(1, sizeof *vcpu); + if (!vcpu) + return NULL; + + vcpu->kvm = kvm; + + return vcpu; +} + +void kvm_cpu__delete(struct kvm_cpu *vcpu) +{ + if (vcpu->msrs) + free(vcpu->msrs); + + free(vcpu); +} + +struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) +{ + struct kvm_cpu *vcpu; + int mmap_size; + int coalesced_offset; + + vcpu = kvm_cpu__new(kvm); + if (!vcpu) + return NULL; + + vcpu->cpu_id = cpu_id; + + vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); + if (vcpu->vcpu_fd < 0) + die_perror("KVM_CREATE_VCPU ioctl"); + + mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); + if (mmap_size < 0) + die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); + + vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); + if (vcpu->kvm_run == MAP_FAILED) + die("unable to mmap vcpu fd"); + + coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); + if (coalesced_offset) + vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); + + vcpu->is_running = true; + + return vcpu; +} + +static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) +{ + struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); + + if (!vcpu) + die("out of memory"); + + return vcpu; +} + +#define KVM_MSR_ENTRY(_index, _data) \ + (struct kvm_msr_entry) { .index = _index, .data = _data } + +static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) +{ + unsigned long ndx = 0; + + vcpu->msrs = kvm_msrs__new(100); + + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); +#ifdef CONFIG_X86_64 + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); +#endif + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, + MSR_IA32_MISC_ENABLE_FAST_STRING); + + vcpu->msrs->nmsrs = ndx; + + if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) + die_perror("KVM_SET_MSRS failed"); +} + +static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) +{ + vcpu->fpu = (struct kvm_fpu) { + .fcw = 0x37f, + .mxcsr = 0x1f80, + }; + + if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) + die_perror("KVM_SET_FPU failed"); +} + +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) +{ + vcpu->regs = (struct kvm_regs) { + /* We start the guest in 16-bit real mode */ + .rflags = 0x0000000000000002ULL, + + .rip = vcpu->kvm->boot_ip, + .rsp = vcpu->kvm->boot_sp, + .rbp = vcpu->kvm->boot_sp, + }; + + if (vcpu->regs.rip > USHRT_MAX) + die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); + + if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) + die_perror("KVM_SET_REGS failed"); +} + +static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) +{ + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) + die_perror("KVM_GET_SREGS failed"); + + vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; + vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; + vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; + vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.es.selector = vcpu->kvm->boot_selector; + vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; + vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); + vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; + vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); + + if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) + die_perror("KVM_SET_SREGS failed"); +} + +/** + * kvm_cpu__reset_vcpu - reset virtual CPU to a known state + */ +void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) +{ + kvm_cpu__setup_cpuid(vcpu); + kvm_cpu__setup_sregs(vcpu); + kvm_cpu__setup_regs(vcpu); + kvm_cpu__setup_fpu(vcpu); + kvm_cpu__setup_msrs(vcpu); +} + +static void print_dtable(const char *name, struct kvm_dtable *dtable) +{ + dprintf(debug_fd, " %s %016llx %08hx\n", + name, (u64) dtable->base, (u16) dtable->limit); +} + +static void print_segment(const char *name, struct kvm_segment *seg) +{ + dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", + name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, + (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); +} + +void kvm_cpu__show_registers(struct kvm_cpu *vcpu) +{ + unsigned long cr0, cr2, cr3; + unsigned long cr4, cr8; + unsigned long rax, rbx, rcx; + unsigned long rdx, rsi, rdi; + unsigned long rbp, r8, r9; + unsigned long r10, r11, r12; + unsigned long r13, r14, r15; + unsigned long rip, rsp; + struct kvm_sregs sregs; + unsigned long rflags; + struct kvm_regs regs; + int i; + + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) + die("KVM_GET_REGS failed"); + + rflags = regs.rflags; + + rip = regs.rip; rsp = regs.rsp; + rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; + rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; + rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; + r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; + r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; + + dprintf(debug_fd, "\n Registers:\n"); + dprintf(debug_fd, " ----------\n"); + dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); + dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); + dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); + dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); + dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); + dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) + die("KVM_GET_REGS failed"); + + cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; + cr4 = sregs.cr4; cr8 = sregs.cr8; + + dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); + dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); + dprintf(debug_fd, "\n Segment registers:\n"); + dprintf(debug_fd, " ------------------\n"); + dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); + print_segment("cs ", &sregs.cs); + print_segment("ss ", &sregs.ss); + print_segment("ds ", &sregs.ds); + print_segment("es ", &sregs.es); + print_segment("fs ", &sregs.fs); + print_segment("gs ", &sregs.gs); + print_segment("tr ", &sregs.tr); + print_segment("ldt", &sregs.ldt); + print_dtable("gdt", &sregs.gdt); + print_dtable("idt", &sregs.idt); + + dprintf(debug_fd, "\n APIC:\n"); + dprintf(debug_fd, " -----\n"); + dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", + (u64) sregs.efer, (u64) sregs.apic_base, + (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); + + dprintf(debug_fd, "\n Interrupt bitmap:\n"); + dprintf(debug_fd, " -----------------\n"); + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) + dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); + dprintf(debug_fd, "\n"); +} + +#define MAX_SYM_LEN 128 + +void kvm_cpu__show_code(struct kvm_cpu *vcpu) +{ + unsigned int code_bytes = 64; + unsigned int code_prologue = code_bytes * 43 / 64; + unsigned int code_len = code_bytes; + char sym[MAX_SYM_LEN]; + unsigned char c; + unsigned int i; + u8 *ip; + + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) + die("KVM_GET_REGS failed"); + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) + die("KVM_GET_SREGS failed"); + + ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); + + dprintf(debug_fd, "\n Code:\n"); + dprintf(debug_fd, " -----\n"); + + symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); + + dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); + + for (i = 0; i < code_len; i++, ip++) { + if (!host_ptr_in_ram(vcpu->kvm, ip)) + break; + + c = *ip; + + if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) + dprintf(debug_fd, " <%02x>", c); + else + dprintf(debug_fd, " %02x", c); + } + + dprintf(debug_fd, "\n"); + + dprintf(debug_fd, "\n Stack:\n"); + dprintf(debug_fd, " ------\n"); + kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); +} + +void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) +{ + u64 *pte1; + u64 *pte2; + u64 *pte3; + u64 *pte4; + + if (!is_in_protected_mode(vcpu)) + return; + + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) + die("KVM_GET_SREGS failed"); + + pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); + if (!host_ptr_in_ram(vcpu->kvm, pte4)) + return; + + pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); + if (!host_ptr_in_ram(vcpu->kvm, pte3)) + return; + + pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); + if (!host_ptr_in_ram(vcpu->kvm, pte2)) + return; + + pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); + if (!host_ptr_in_ram(vcpu->kvm, pte1)) + return; + + dprintf(debug_fd, "Page Tables:\n"); + if (*pte2 & (1 << 7)) + dprintf(debug_fd, " pte4: %016llx pte3: %016llx" + " pte2: %016llx\n", + *pte4, *pte3, *pte2); + else + dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" + "llx pte1: %016llx\n", + *pte4, *pte3, *pte2, *pte1); +} diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c new file mode 100644 index 0000000..ac6c91e --- /dev/null +++ b/tools/kvm/x86/kvm.c @@ -0,0 +1,330 @@ +#include "kvm/kvm.h" +#include "kvm/boot-protocol.h" +#include "kvm/cpufeature.h" +#include "kvm/interrupt.h" +#include "kvm/mptable.h" +#include "kvm/util.h" + +#include <asm/bootparam.h> +#include <linux/kvm.h> + +#include <sys/types.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <stdbool.h> +#include <assert.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdio.h> +#include <fcntl.h> +#include <asm/unistd.h> + +struct kvm_ext kvm_req_ext[] = { + { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, + { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, + { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, + { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, + { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, + { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, + { DEFINE_KVM_EXT(KVM_CAP_HLT) }, + { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, + { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, + { 0, 0 } +}; + +bool kvm__arch_cpu_supports_vm(void) +{ + struct cpuid_regs regs; + u32 eax_base; + int feature; + + regs = (struct cpuid_regs) { + .eax = 0x00, + }; + host_cpuid(®s); + + switch (regs.ebx) { + case CPUID_VENDOR_INTEL_1: + eax_base = 0x00; + feature = KVM__X86_FEATURE_VMX; + break; + + case CPUID_VENDOR_AMD_1: + eax_base = 0x80000000; + feature = KVM__X86_FEATURE_SVM; + break; + + default: + return false; + } + + regs = (struct cpuid_regs) { + .eax = eax_base, + }; + host_cpuid(®s); + + if (regs.eax < eax_base + 0x01) + return false; + + regs = (struct cpuid_regs) { + .eax = eax_base + 0x01 + }; + host_cpuid(®s); + + return regs.ecx & (1 << feature); +} + +/* + * Allocating RAM size bigger than 4GB requires us to leave a gap + * in the RAM which is used for PCI MMIO, hotplug, and unconfigured + * devices (see documentation of e820_setup_gap() for details). + * + * If we're required to initialize RAM bigger than 4GB, we will create + * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. + */ + +void kvm__init_ram(struct kvm *kvm) +{ + u64 phys_start, phys_size; + void *host_mem; + + if (kvm->ram_size < KVM_32BIT_GAP_START) { + /* Use a single block of RAM for 32bit RAM */ + + phys_start = 0; + phys_size = kvm->ram_size; + host_mem = kvm->ram_start; + + kvm__register_mem(kvm, phys_start, phys_size, host_mem); + } else { + /* First RAM range from zero to the PCI gap: */ + + phys_start = 0; + phys_size = KVM_32BIT_GAP_START; + host_mem = kvm->ram_start; + + kvm__register_mem(kvm, phys_start, phys_size, host_mem); + + /* Second RAM range from 4GB to the end of RAM: */ + + phys_start = 0x100000000ULL; + phys_size = kvm->ram_size - phys_size; + host_mem = kvm->ram_start + phys_start; + + kvm__register_mem(kvm, phys_start, phys_size, host_mem); + } +} + +/* Architecture-specific KVM init */ +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name) +{ + struct kvm_pit_config pit_config = { .flags = 0, }; + int ret; + + ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); + if (ret < 0) + die_perror("KVM_SET_TSS_ADDR ioctl"); + + ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); + if (ret < 0) + die_perror("KVM_CREATE_PIT2 ioctl"); + + kvm->ram_size = ram_size; + + if (kvm->ram_size < KVM_32BIT_GAP_START) { + kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); + } else { + kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); + if (kvm->ram_start != MAP_FAILED) { + /* + * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that + * if we accidently write to it, we will know. + */ + mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); + } + } + if (kvm->ram_start == MAP_FAILED) + die("out of memory"); + + madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); + + ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); + if (ret < 0) + die_perror("KVM_CREATE_IRQCHIP ioctl"); +} + +void kvm__irq_line(struct kvm *kvm, int irq, int level) +{ + struct kvm_irq_level irq_level; + + irq_level = (struct kvm_irq_level) { + { + .irq = irq, + }, + .level = level, + }; + + if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) + die_perror("KVM_IRQ_LINE failed"); +} + +void kvm__irq_trigger(struct kvm *kvm, int irq) +{ + kvm__irq_line(kvm, irq, 1); + kvm__irq_line(kvm, irq, 0); +} + +#define BOOT_LOADER_SELECTOR 0x1000 +#define BOOT_LOADER_IP 0x0000 +#define BOOT_LOADER_SP 0x8000 +#define BOOT_CMDLINE_OFFSET 0x20000 + +#define BOOT_PROTOCOL_REQUIRED 0x206 +#define LOAD_HIGH 0x01 + +int load_flat_binary(struct kvm *kvm, int fd) +{ + void *p; + int nr; + + if (lseek(fd, 0, SEEK_SET) < 0) + die_perror("lseek"); + + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); + + while ((nr = read(fd, p, 65536)) > 0) + p += nr; + + kvm->boot_selector = BOOT_LOADER_SELECTOR; + kvm->boot_ip = BOOT_LOADER_IP; + kvm->boot_sp = BOOT_LOADER_SP; + + return true; +} + +static const char *BZIMAGE_MAGIC = "HdrS"; + +bool load_bzimage(struct kvm *kvm, int fd_kernel, + int fd_initrd, const char *kernel_cmdline, u16 vidmode) +{ + struct boot_params *kern_boot; + unsigned long setup_sects; + struct boot_params boot; + size_t cmdline_size; + ssize_t setup_size; + void *p; + int nr; + + /* + * See Documentation/x86/boot.txt for details no bzImage on-disk and + * memory layout. + */ + + if (lseek(fd_kernel, 0, SEEK_SET) < 0) + die_perror("lseek"); + + if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) + return false; + + if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) + return false; + + if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) + die("Too old kernel"); + + if (lseek(fd_kernel, 0, SEEK_SET) < 0) + die_perror("lseek"); + + if (!boot.hdr.setup_sects) + boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; + setup_sects = boot.hdr.setup_sects + 1; + + setup_size = setup_sects << 9; + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); + + /* copy setup.bin to mem*/ + if (read(fd_kernel, p, setup_size) != setup_size) + die_perror("read"); + + /* copy vmlinux.bin to BZ_KERNEL_START*/ + p = guest_flat_to_host(kvm, BZ_KERNEL_START); + + while ((nr = read(fd_kernel, p, 65536)) > 0) + p += nr; + + p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); + if (kernel_cmdline) { + cmdline_size = strlen(kernel_cmdline) + 1; + if (cmdline_size > boot.hdr.cmdline_size) + cmdline_size = boot.hdr.cmdline_size; + + memset(p, 0, boot.hdr.cmdline_size); + memcpy(p, kernel_cmdline, cmdline_size - 1); + } + + kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); + + kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; + kern_boot->hdr.type_of_loader = 0xff; + kern_boot->hdr.heap_end_ptr = 0xfe00; + kern_boot->hdr.loadflags |= CAN_USE_HEAP; + kern_boot->hdr.vid_mode = vidmode; + + /* + * Read initrd image into guest memory + */ + if (fd_initrd >= 0) { + struct stat initrd_stat; + unsigned long addr; + + if (fstat(fd_initrd, &initrd_stat)) + die_perror("fstat"); + + addr = boot.hdr.initrd_addr_max & ~0xfffff; + for (;;) { + if (addr < BZ_KERNEL_START) + die("Not enough memory for initrd"); + else if (addr < (kvm->ram_size - initrd_stat.st_size)) + break; + addr -= 0x100000; + } + + p = guest_flat_to_host(kvm, addr); + nr = read(fd_initrd, p, initrd_stat.st_size); + if (nr != initrd_stat.st_size) + die("Failed to read initrd"); + + kern_boot->hdr.ramdisk_image = addr; + kern_boot->hdr.ramdisk_size = initrd_stat.st_size; + } + + kvm->boot_selector = BOOT_LOADER_SELECTOR; + /* + * The real-mode setup code starts at offset 0x200 of a bzImage. See + * Documentation/x86/boot.txt for details. + */ + kvm->boot_ip = BOOT_LOADER_IP + 0x200; + kvm->boot_sp = BOOT_LOADER_SP; + + return true; +} + +/** + * kvm__arch_setup_firmware - inject BIOS into guest system memory + * @kvm - guest system descriptor + * + * This function is a main routine where we poke guest memory + * and install BIOS there. + */ +void kvm__arch_setup_firmware(struct kvm *kvm) +{ + /* standart minimal configuration */ + setup_bios(kvm); + + /* FIXME: SMP, ACPI and friends here */ + + /* MP table */ + mptable_setup(kvm, kvm->nrcpus); +} diff --git a/tools/kvm/mptable.c b/tools/kvm/x86/mptable.c similarity index 100% rename from tools/kvm/mptable.c rename to tools/kvm/x86/mptable.c -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html