The code doesn't build after this patch due to missing header issues which you fixed in patches #10 & #11. Could you please move those two to the beginning of the series for the sake of bisectablilty? On Tue, 2011-12-06 at 14:37 +1100, Matt Evans wrote: > Create a new arch-specific subdirectory to contain architecture-specific code > and includes. > > The Makefile now adds various arch-specific objects based on detected > architecture. That aside, this patch should only contain code moves. These > include: > > - x86-specific kvm_cpu setup, kernel loading, memory setup etc. now in > x86/kvm{-cpu}.c > - BIOS now lives in x86/bios/ > - ioport setup > - KVM extensions are asserted in arch-specific kvm.c now, so each architecture > can manage its own dependencies. > - Various architecture-specific #defines are moved into $(ARCH)/include/kvm{-cpu}.h > such as struct kvm_cpu, KVM_NR_CPUS, KVM_32BIT_GAP_SIZE. > > Signed-off-by: Matt Evans <matt@xxxxxxxxxx> > --- > tools/kvm/Makefile | 96 ++++--- > tools/kvm/builtin-run.c | 6 +- > tools/kvm/include/kvm/ioport.h | 2 +- > tools/kvm/include/kvm/kvm-cpu.h | 27 +-- > tools/kvm/include/kvm/kvm.h | 58 +--- > tools/kvm/ioport.c | 54 ---- > tools/kvm/kvm-cpu.c | 372 ---------------------- > tools/kvm/kvm.c | 323 +------------------- > tools/kvm/{ => x86}/bios.c | 0 > tools/kvm/{ => x86}/bios/.gitignore | 0 > tools/kvm/{ => x86}/bios/bios-rom.S | 2 +- > tools/kvm/{ => x86}/bios/e820.c | 0 > tools/kvm/{ => x86}/bios/entry.S | 0 > tools/kvm/{ => x86}/bios/gen-offsets.sh | 0 > tools/kvm/{ => x86}/bios/int10.c | 0 > tools/kvm/{ => x86}/bios/int15.c | 0 > tools/kvm/{ => x86}/bios/local.S | 0 > tools/kvm/{ => x86}/bios/macro.S | 0 > tools/kvm/{ => x86}/bios/memcpy.c | 0 > tools/kvm/{ => x86}/bios/rom.ld.S | 0 > tools/kvm/{ => x86}/cpuid.c | 0 > tools/kvm/{ => x86}/include/kvm/assembly.h | 0 > tools/kvm/{ => x86}/include/kvm/barrier.h | 0 > tools/kvm/{ => x86}/include/kvm/bios-export.h | 0 > tools/kvm/{ => x86}/include/kvm/bios.h | 0 > tools/kvm/{ => x86}/include/kvm/boot-protocol.h | 0 > tools/kvm/{ => x86}/include/kvm/cpufeature.h | 0 > tools/kvm/{ => x86}/include/kvm/interrupt.h | 0 > tools/kvm/x86/include/kvm/kvm-arch.h | 59 ++++ > tools/kvm/x86/include/kvm/kvm-cpu-arch.h | 33 ++ > tools/kvm/{ => x86}/include/kvm/mptable.h | 0 > tools/kvm/{ => x86}/interrupt.c | 0 > tools/kvm/x86/ioport.c | 59 ++++ > tools/kvm/{ => x86}/irq.c | 0 > tools/kvm/x86/kvm-cpu.c | 383 +++++++++++++++++++++++ > tools/kvm/x86/kvm.c | 330 +++++++++++++++++++ > tools/kvm/{ => x86}/mptable.c | 0 > 37 files changed, 951 insertions(+), 853 deletions(-) > rename tools/kvm/{ => x86}/bios.c (100%) > rename tools/kvm/{ => x86}/bios/.gitignore (100%) > rename tools/kvm/{ => x86}/bios/bios-rom.S (80%) > rename tools/kvm/{ => x86}/bios/e820.c (100%) > rename tools/kvm/{ => x86}/bios/entry.S (100%) > rename tools/kvm/{ => x86}/bios/gen-offsets.sh (100%) > rename tools/kvm/{ => x86}/bios/int10.c (100%) > rename tools/kvm/{ => x86}/bios/int15.c (100%) > rename tools/kvm/{ => x86}/bios/local.S (100%) > rename tools/kvm/{ => x86}/bios/macro.S (100%) > rename tools/kvm/{ => x86}/bios/memcpy.c (100%) > rename tools/kvm/{ => x86}/bios/rom.ld.S (100%) > rename tools/kvm/{ => x86}/cpuid.c (100%) > rename tools/kvm/{ => x86}/include/kvm/assembly.h (100%) > rename tools/kvm/{ => x86}/include/kvm/barrier.h (100%) > rename tools/kvm/{ => x86}/include/kvm/bios-export.h (100%) > rename tools/kvm/{ => x86}/include/kvm/bios.h (100%) > rename tools/kvm/{ => x86}/include/kvm/boot-protocol.h (100%) > rename tools/kvm/{ => x86}/include/kvm/cpufeature.h (100%) > rename tools/kvm/{ => x86}/include/kvm/interrupt.h (100%) > create mode 100644 tools/kvm/x86/include/kvm/kvm-arch.h > create mode 100644 tools/kvm/x86/include/kvm/kvm-cpu-arch.h > rename tools/kvm/{ => x86}/include/kvm/mptable.h (100%) > rename tools/kvm/{ => x86}/interrupt.c (100%) > create mode 100644 tools/kvm/x86/ioport.c > rename tools/kvm/{ => x86}/irq.c (100%) > create mode 100644 tools/kvm/x86/kvm-cpu.c > create mode 100644 tools/kvm/x86/kvm.c > rename tools/kvm/{ => x86}/mptable.c (100%) > > diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile > index bb5f6b0..243886e 100644 > --- a/tools/kvm/Makefile > +++ b/tools/kvm/Makefile > @@ -33,13 +33,11 @@ OBJS += builtin-run.o > OBJS += builtin-setup.o > OBJS += builtin-stop.o > OBJS += builtin-version.o > -OBJS += cpuid.o > OBJS += disk/core.o > OBJS += framebuffer.o > OBJS += guest_compat.o > OBJS += hw/rtc.o > OBJS += hw/serial.o > -OBJS += interrupt.o > OBJS += ioport.o > OBJS += kvm-cpu.o > OBJS += kvm.o > @@ -61,7 +59,6 @@ OBJS += disk/blk.o > OBJS += disk/qcow.o > OBJS += disk/raw.o > OBJS += ioeventfd.o > -OBJS += irq.o > OBJS += net/uip/core.o > OBJS += net/uip/arp.o > OBJS += net/uip/icmp.o > @@ -72,7 +69,6 @@ OBJS += net/uip/buf.o > OBJS += net/uip/csum.o > OBJS += net/uip/dhcp.o > OBJS += kvm-cmd.o > -OBJS += mptable.o > OBJS += rbtree.o > OBJS += threadpool.o > OBJS += util/parse-options.o > @@ -123,12 +119,6 @@ ifeq ($(has_AIO),y) > LIBS += -laio > endif > > -DEPS := $(patsubst %.o,%.d,$(OBJS)) > - > -# Exclude BIOS object files from header dependencies. > -OBJS += bios.o > -OBJS += bios/bios-rom.o > - > LIBS += -lrt > LIBS += -lpthread > LIBS += -lutil > @@ -150,12 +140,43 @@ ifeq ($(uname_M),x86_64) > DEFINES += -DCONFIG_X86_64 > endif > > + > +### Arch-specific stuff > + > +#x86 > +ifeq ($(ARCH),x86) > + DEFINES += -DCONFIG_X86 > + OBJS += x86/cpuid.o > + OBJS += x86/interrupt.o > + OBJS += x86/ioport.o > + OBJS += x86/irq.o > + OBJS += x86/kvm.o > + OBJS += x86/kvm-cpu.o > + OBJS += x86/mptable.o > +# Exclude BIOS object files from header dependencies. > + OTHEROBJS += x86/bios.o > + OTHEROBJS += x86/bios/bios-rom.o > + ARCH_INCLUDE := x86/include > +endif > + > +### > + > +ifeq (,$(ARCH_INCLUDE)) > + UNSUPP_ERR = @echo "This architecture is not supported in kvmtool." && exit 1 > +else > + UNSUPP_ERR = > +endif > + > +DEPS := $(patsubst %.o,%.d,$(OBJS)) > +OBJS += $(OTHEROBJS) > + > DEFINES += -D_FILE_OFFSET_BITS=64 > DEFINES += -D_GNU_SOURCE > DEFINES += -DKVMTOOLS_VERSION='"$(KVMTOOLS_VERSION)"' > +DEFINES += -DBUILD_ARCH='"$(ARCH)"' > > KVM_INCLUDE := include > -CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g > +CFLAGS += $(CPPFLAGS) $(DEFINES) -I$(KVM_INCLUDE) -I$(ARCH_INCLUDE) -I../../include -I../../arch/$(ARCH)/include/ -Os -g > > ifneq ($(WERROR),0) > WARNINGS += -Werror > @@ -179,7 +200,10 @@ WARNINGS += -Wwrite-strings > > CFLAGS += $(WARNINGS) > > -all: $(PROGRAM) $(GUEST_INIT) > +all: arch_support_check $(PROGRAM) $(GUEST_INIT) > + > +arch_support_check: > + $(UNSUPP_ERR) > > KVMTOOLS-VERSION-FILE: > @$(SHELL_PATH) util/KVMTOOLS-VERSION-GEN $(OUTPUT) > @@ -227,33 +251,33 @@ BIOS_CFLAGS += -mregparm=3 > BIOS_CFLAGS += -fno-stack-protector > BIOS_CFLAGS += -I../../arch/$(ARCH) > > -bios.o: bios/bios.bin bios/bios-rom.h > - > -bios/bios.bin.elf: bios/entry.S bios/e820.c bios/int10.c bios/int15.c bios/rom.ld.S > - $(E) " CC bios/memcpy.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/memcpy.c -o bios/memcpy.o > - $(E) " CC bios/e820.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/e820.c -o bios/e820.o > - $(E) " CC bios/int10.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int10.c -o bios/int10.o > - $(E) " CC bios/int15.o" > - $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/int15.c -o bios/int15.o > - $(E) " CC bios/entry.o" > - $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s bios/entry.S -o bios/entry.o > +x86/bios.o: x86/bios/bios.bin x86/bios/bios-rom.h > + > +x86/bios/bios.bin.elf: x86/bios/entry.S x86/bios/e820.c x86/bios/int10.c x86/bios/int15.c x86/bios/rom.ld.S > + $(E) " CC x86/bios/memcpy.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/memcpy.c -o x86/bios/memcpy.o > + $(E) " CC x86/bios/e820.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/e820.c -o x86/bios/e820.o > + $(E) " CC x86/bios/int10.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int10.c -o x86/bios/int10.o > + $(E) " CC x86/bios/int15.o" > + $(Q) $(CC) -include code16gcc.h $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/int15.c -o x86/bios/int15.o > + $(E) " CC x86/bios/entry.o" > + $(Q) $(CC) $(CFLAGS) $(BIOS_CFLAGS) -c -s x86/bios/entry.S -o x86/bios/entry.o > $(E) " LD " $@ > - $(Q) ld -T bios/rom.ld.S -o bios/bios.bin.elf bios/memcpy.o bios/entry.o bios/e820.o bios/int10.o bios/int15.o > + $(Q) ld -T x86/bios/rom.ld.S -o x86/bios/bios.bin.elf x86/bios/memcpy.o x86/bios/entry.o x86/bios/e820.o x86/bios/int10.o x86/bios/int15.o > > -bios/bios.bin: bios/bios.bin.elf > +x86/bios/bios.bin: x86/bios/bios.bin.elf > $(E) " OBJCOPY " $@ > - $(Q) objcopy -O binary -j .text bios/bios.bin.elf bios/bios.bin > + $(Q) objcopy -O binary -j .text x86/bios/bios.bin.elf x86/bios/bios.bin > > -bios/bios-rom.o: bios/bios-rom.S bios/bios.bin bios/bios-rom.h > +x86/bios/bios-rom.o: x86/bios/bios-rom.S x86/bios/bios.bin x86/bios/bios-rom.h > $(E) " CC " $@ > - $(Q) $(CC) -c $(CFLAGS) bios/bios-rom.S -o bios/bios-rom.o > + $(Q) $(CC) -c $(CFLAGS) x86/bios/bios-rom.S -o x86/bios/bios-rom.o > > -bios/bios-rom.h: bios/bios.bin.elf > +x86/bios/bios-rom.h: x86/bios/bios.bin.elf > $(E) " NM " $@ > - $(Q) cd bios && sh gen-offsets.sh > bios-rom.h && cd .. > + $(Q) cd x86/bios && sh gen-offsets.sh > bios-rom.h && cd .. > > check: $(PROGRAM) > $(MAKE) -C tests > @@ -263,10 +287,10 @@ check: $(PROGRAM) > > clean: > $(E) " CLEAN" > - $(Q) rm -f bios/*.bin > - $(Q) rm -f bios/*.elf > - $(Q) rm -f bios/*.o > - $(Q) rm -f bios/bios-rom.h > + $(Q) rm -f x86/bios/*.bin > + $(Q) rm -f x86/bios/*.elf > + $(Q) rm -f x86/bios/*.o > + $(Q) rm -f x86/bios/bios-rom.h > $(Q) rm -f tests/boot/boot_test.iso > $(Q) rm -rf tests/boot/rootfs/ > $(Q) rm -f $(DEPS) $(OBJS) $(PROGRAM) $(GUEST_INIT) > diff --git a/tools/kvm/builtin-run.c b/tools/kvm/builtin-run.c > index 33de4f6..9148d83 100644 > --- a/tools/kvm/builtin-run.c > +++ b/tools/kvm/builtin-run.c > @@ -568,7 +568,7 @@ static const char *host_kernels[] = { > > static const char *default_kernels[] = { > "./bzImage", > - "../../arch/x86/boot/bzImage", > + "../../arch/" BUILD_ARCH "/boot/bzImage", > NULL > }; > > @@ -886,7 +886,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) > > kvm->vmlinux = vmlinux_filename; > > - ioport__setup_legacy(); > + ioport__setup_arch(); > > rtc__init(); > > @@ -931,7 +931,7 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix) > > kvm__start_timer(kvm); > > - kvm__setup_bios(kvm); > + kvm__arch_setup_firmware(kvm); > > for (i = 0; i < nrcpus; i++) { > kvm_cpus[i] = kvm_cpu__init(kvm, i); > diff --git a/tools/kvm/include/kvm/ioport.h b/tools/kvm/include/kvm/ioport.h > index 5b857dd..61a70ec 100644 > --- a/tools/kvm/include/kvm/ioport.h > +++ b/tools/kvm/include/kvm/ioport.h > @@ -28,7 +28,7 @@ struct ioport_operations { > bool (*io_out)(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size); > }; > > -void ioport__setup_legacy(void); > +void ioport__setup_arch(void); > > u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param); > > diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h > index 01540ac..719e286 100644 > --- a/tools/kvm/include/kvm/kvm-cpu.h > +++ b/tools/kvm/include/kvm/kvm-cpu.h > @@ -1,32 +1,7 @@ > #ifndef KVM__KVM_CPU_H > #define KVM__KVM_CPU_H > > -#include <linux/kvm.h> /* for struct kvm_regs */ > - > -#include <pthread.h> > - > -struct kvm; > - > -struct kvm_cpu { > - pthread_t thread; /* VCPU thread */ > - > - unsigned long cpu_id; > - > - struct kvm *kvm; /* parent KVM */ > - int vcpu_fd; /* For VCPU ioctls() */ > - struct kvm_run *kvm_run; > - > - struct kvm_regs regs; > - struct kvm_sregs sregs; > - struct kvm_fpu fpu; > - > - struct kvm_msrs *msrs; /* dynamically allocated */ > - > - u8 is_running; > - u8 paused; > - > - struct kvm_coalesced_mmio_ring *ring; > -}; > +#include "kvm/kvm-cpu-arch.h" > > struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id); > void kvm_cpu__delete(struct kvm_cpu *vcpu); > diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h > index 2b3024a..ca1acc0 100644 > --- a/tools/kvm/include/kvm/kvm.h > +++ b/tools/kvm/include/kvm/kvm.h > @@ -1,22 +1,13 @@ > #ifndef KVM__KVM_H > #define KVM__KVM_H > > -#include "kvm/interrupt.h" > -#include "kvm/segment.h" > +#include "kvm/kvm-arch.h" > > #include <stdbool.h> > #include <linux/types.h> > #include <time.h> > #include <signal.h> > > -#define KVM_NR_CPUS (255) > - > -/* > - * The hole includes VESA framebuffer and PCI memory. > - */ > -#define KVM_32BIT_GAP_SIZE (768 << 20) > -#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) > - > #define SIGKVMEXIT (SIGRTMIN + 0) > #define SIGKVMPAUSE (SIGRTMIN + 1) > #define SIGKVMSTOP (SIGRTMIN + 4) > @@ -25,33 +16,15 @@ > #define KVM_PID_FILE_PATH "/.kvm-tools/" > #define HOME_DIR getenv("HOME") > > -struct kvm { > - int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ > - int vm_fd; /* For VM ioctls() */ > - timer_t timerid; /* Posix timer for interrupts */ > - > - int nrcpus; /* Number of cpus to run */ > - > - u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ > - > - u64 ram_size; > - void *ram_start; > - > - bool nmi_disabled; > - > - bool single_step; > +#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) > > - u16 boot_selector; > - u16 boot_ip; > - u16 boot_sp; > +#define DEFINE_KVM_EXT(ext) \ > + .name = #ext, \ > + .code = ext > > - struct interrupt_table interrupt_table; > - > - const char *vmlinux; > - struct disk_image **disks; > - int nr_disks; > - > - const char *name; > +struct kvm_ext { > + const char *name; > + int code; > }; > > void kvm__set_dir(const char *fmt, ...); > @@ -64,7 +37,6 @@ void kvm__init_ram(struct kvm *kvm); > void kvm__delete(struct kvm *kvm); > bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename, > const char *initrd_filename, const char *kernel_cmdline, u16 vidmode); > -void kvm__setup_bios(struct kvm *kvm); > void kvm__start_timer(struct kvm *kvm); > void kvm__stop_timer(struct kvm *kvm); > void kvm__irq_line(struct kvm *kvm, int irq, int level); > @@ -81,6 +53,13 @@ int kvm__get_sock_by_instance(const char *name); > int kvm__enumerate_instances(int (*callback)(const char *name, int pid)); > void kvm__remove_socket(const char *name); > > +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name); > +void kvm__arch_setup_firmware(struct kvm *kvm); > +bool kvm__arch_cpu_supports_vm(void); > + > +int load_flat_binary(struct kvm *kvm, int fd); > +bool load_bzimage(struct kvm *kvm, int fd_kernel, int fd_initrd, const char *kernel_cmdline, u16 vidmode); > + > /* > * Debugging > */ > @@ -98,11 +77,4 @@ static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset) > return kvm->ram_start + offset; > } > > -static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) > -{ > - unsigned long flat = segment_to_flat(selector, offset); > - > - return guest_flat_to_host(kvm, flat); > -} > - > #endif /* KVM__KVM_H */ > diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c > index 7cbc44e..965cfc2 100644 > --- a/tools/kvm/ioport.c > +++ b/tools/kvm/ioport.c > @@ -52,34 +52,6 @@ static int ioport_insert(struct rb_root *root, struct ioport *data) > return rb_int_insert(root, &data->node); > } > > -static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > -{ > - exit(EXIT_SUCCESS); > -} > - > -static struct ioport_operations debug_ops = { > - .io_out = debug_io_out, > -}; > - > -static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > -{ > - return true; > -} > - > -static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > -{ > - return true; > -} > - > -static struct ioport_operations dummy_read_write_ioport_ops = { > - .io_in = dummy_io_in, > - .io_out = dummy_io_out, > -}; > - > -static struct ioport_operations dummy_write_only_ioport_ops = { > - .io_out = dummy_io_out, > -}; > - > u16 ioport__register(u16 port, struct ioport_operations *ops, int count, void *param) > { > struct ioport *entry; > @@ -164,29 +136,3 @@ error: > > return !ioport_debug; > } > - > -void ioport__setup_legacy(void) > -{ > - /* 0x0020 - 0x003F - 8259A PIC 1 */ > - ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); > - > - /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ > - ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); > - > - /* 0x00A0 - 0x00AF - 8259A PIC 2 */ > - ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); > - > - /* PORT 00E0-00EF are 'motherboard specific' so we use them for our > - internal debugging purposes. */ > - ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); > - > - /* PORT 00ED - DUMMY PORT FOR DELAY??? */ > - ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); > - > - /* 0x00F0 - 0x00FF - Math co-processor */ > - ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); > - > - /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ > - ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); > - ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); > -} > diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c > index 0ad6f3b..5aba3bb 100644 > --- a/tools/kvm/kvm-cpu.c > +++ b/tools/kvm/kvm-cpu.c > @@ -4,8 +4,6 @@ > #include "kvm/util.h" > #include "kvm/kvm.h" > > -#include <asm/msr-index.h> > - > #include <sys/ioctl.h> > #include <sys/mman.h> > #include <signal.h> > @@ -14,106 +12,9 @@ > #include <errno.h> > #include <stdio.h> > > -#define PAGE_SIZE (sysconf(_SC_PAGE_SIZE)) > - > extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; > extern __thread struct kvm_cpu *current_kvm_cpu; > > -static int debug_fd; > - > -void kvm_cpu__set_debug_fd(int fd) > -{ > - debug_fd = fd; > -} > - > -int kvm_cpu__get_debug_fd(void) > -{ > - return debug_fd; > -} > - > -static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) > -{ > - return vcpu->sregs.cr0 & 0x01; > -} > - > -static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) > -{ > - u64 cs; > - > - /* > - * NOTE! We should take code segment base address into account here. > - * Luckily it's usually zero because Linux uses flat memory model. > - */ > - if (is_in_protected_mode(vcpu)) > - return ip; > - > - cs = vcpu->sregs.cs.selector; > - > - return ip + (cs << 4); > -} > - > -static inline u32 selector_to_base(u16 selector) > -{ > - /* > - * KVM on Intel requires 'base' to be 'selector * 16' in real mode. > - */ > - return (u32)selector * 16; > -} > - > -static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) > -{ > - struct kvm_cpu *vcpu; > - > - vcpu = calloc(1, sizeof *vcpu); > - if (!vcpu) > - return NULL; > - > - vcpu->kvm = kvm; > - > - return vcpu; > -} > - > -void kvm_cpu__delete(struct kvm_cpu *vcpu) > -{ > - if (vcpu->msrs) > - free(vcpu->msrs); > - > - free(vcpu); > -} > - > -struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) > -{ > - struct kvm_cpu *vcpu; > - int mmap_size; > - int coalesced_offset; > - > - vcpu = kvm_cpu__new(kvm); > - if (!vcpu) > - return NULL; > - > - vcpu->cpu_id = cpu_id; > - > - vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); > - if (vcpu->vcpu_fd < 0) > - die_perror("KVM_CREATE_VCPU ioctl"); > - > - mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); > - if (mmap_size < 0) > - die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); > - > - vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); > - if (vcpu->kvm_run == MAP_FAILED) > - die("unable to mmap vcpu fd"); > - > - coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); > - if (coalesced_offset) > - vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); > - > - vcpu->is_running = true; > - > - return vcpu; > -} > - > void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) > { > struct kvm_guest_debug debug = { > @@ -124,278 +25,6 @@ void kvm_cpu__enable_singlestep(struct kvm_cpu *vcpu) > pr_warning("KVM_SET_GUEST_DEBUG failed"); > } > > -static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) > -{ > - struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); > - > - if (!vcpu) > - die("out of memory"); > - > - return vcpu; > -} > - > -#define KVM_MSR_ENTRY(_index, _data) \ > - (struct kvm_msr_entry) { .index = _index, .data = _data } > - > -static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) > -{ > - unsigned long ndx = 0; > - > - vcpu->msrs = kvm_msrs__new(100); > - > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); > -#ifdef CONFIG_X86_64 > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); > -#endif > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); > - vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, > - MSR_IA32_MISC_ENABLE_FAST_STRING); > - > - vcpu->msrs->nmsrs = ndx; > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) > - die_perror("KVM_SET_MSRS failed"); > -} > - > -static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) > -{ > - vcpu->fpu = (struct kvm_fpu) { > - .fcw = 0x37f, > - .mxcsr = 0x1f80, > - }; > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) > - die_perror("KVM_SET_FPU failed"); > -} > - > -static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) > -{ > - vcpu->regs = (struct kvm_regs) { > - /* We start the guest in 16-bit real mode */ > - .rflags = 0x0000000000000002ULL, > - > - .rip = vcpu->kvm->boot_ip, > - .rsp = vcpu->kvm->boot_sp, > - .rbp = vcpu->kvm->boot_sp, > - }; > - > - if (vcpu->regs.rip > USHRT_MAX) > - die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) > - die_perror("KVM_SET_REGS failed"); > -} > - > -static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) > -{ > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > - die_perror("KVM_GET_SREGS failed"); > - > - vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.es.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); > - vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; > - vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); > - > - if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) > - die_perror("KVM_SET_SREGS failed"); > -} > - > -/** > - * kvm_cpu__reset_vcpu - reset virtual CPU to a known state > - */ > -void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) > -{ > - kvm_cpu__setup_sregs(vcpu); > - kvm_cpu__setup_regs(vcpu); > - kvm_cpu__setup_fpu(vcpu); > - kvm_cpu__setup_msrs(vcpu); > -} > - > -static void print_dtable(const char *name, struct kvm_dtable *dtable) > -{ > - dprintf(debug_fd, " %s %016llx %08hx\n", > - name, (u64) dtable->base, (u16) dtable->limit); > -} > - > -static void print_segment(const char *name, struct kvm_segment *seg) > -{ > - dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", > - name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, > - (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); > -} > - > -void kvm_cpu__show_registers(struct kvm_cpu *vcpu) > -{ > - unsigned long cr0, cr2, cr3; > - unsigned long cr4, cr8; > - unsigned long rax, rbx, rcx; > - unsigned long rdx, rsi, rdi; > - unsigned long rbp, r8, r9; > - unsigned long r10, r11, r12; > - unsigned long r13, r14, r15; > - unsigned long rip, rsp; > - struct kvm_sregs sregs; > - unsigned long rflags; > - struct kvm_regs regs; > - int i; > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) > - die("KVM_GET_REGS failed"); > - > - rflags = regs.rflags; > - > - rip = regs.rip; rsp = regs.rsp; > - rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; > - rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; > - rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; > - r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; > - r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; > - > - dprintf(debug_fd, "\n Registers:\n"); > - dprintf(debug_fd, " ----------\n"); > - dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); > - dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); > - dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); > - dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); > - dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); > - dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) > - die("KVM_GET_REGS failed"); > - > - cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; > - cr4 = sregs.cr4; cr8 = sregs.cr8; > - > - dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); > - dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); > - dprintf(debug_fd, "\n Segment registers:\n"); > - dprintf(debug_fd, " ------------------\n"); > - dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); > - print_segment("cs ", &sregs.cs); > - print_segment("ss ", &sregs.ss); > - print_segment("ds ", &sregs.ds); > - print_segment("es ", &sregs.es); > - print_segment("fs ", &sregs.fs); > - print_segment("gs ", &sregs.gs); > - print_segment("tr ", &sregs.tr); > - print_segment("ldt", &sregs.ldt); > - print_dtable("gdt", &sregs.gdt); > - print_dtable("idt", &sregs.idt); > - > - dprintf(debug_fd, "\n APIC:\n"); > - dprintf(debug_fd, " -----\n"); > - dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", > - (u64) sregs.efer, (u64) sregs.apic_base, > - (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); > - > - dprintf(debug_fd, "\n Interrupt bitmap:\n"); > - dprintf(debug_fd, " -----------------\n"); > - for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) > - dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); > - dprintf(debug_fd, "\n"); > -} > - > -#define MAX_SYM_LEN 128 > - > -void kvm_cpu__show_code(struct kvm_cpu *vcpu) > -{ > - unsigned int code_bytes = 64; > - unsigned int code_prologue = code_bytes * 43 / 64; > - unsigned int code_len = code_bytes; > - char sym[MAX_SYM_LEN]; > - unsigned char c; > - unsigned int i; > - u8 *ip; > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) > - die("KVM_GET_REGS failed"); > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > - die("KVM_GET_SREGS failed"); > - > - ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); > - > - dprintf(debug_fd, "\n Code:\n"); > - dprintf(debug_fd, " -----\n"); > - > - symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); > - > - dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); > - > - for (i = 0; i < code_len; i++, ip++) { > - if (!host_ptr_in_ram(vcpu->kvm, ip)) > - break; > - > - c = *ip; > - > - if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) > - dprintf(debug_fd, " <%02x>", c); > - else > - dprintf(debug_fd, " %02x", c); > - } > - > - dprintf(debug_fd, "\n"); > - > - dprintf(debug_fd, "\n Stack:\n"); > - dprintf(debug_fd, " ------\n"); > - kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); > -} > - > -void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) > -{ > - u64 *pte1; > - u64 *pte2; > - u64 *pte3; > - u64 *pte4; > - > - if (!is_in_protected_mode(vcpu)) > - return; > - > - if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > - die("KVM_GET_SREGS failed"); > - > - pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); > - if (!host_ptr_in_ram(vcpu->kvm, pte4)) > - return; > - > - pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); > - if (!host_ptr_in_ram(vcpu->kvm, pte3)) > - return; > - > - pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); > - if (!host_ptr_in_ram(vcpu->kvm, pte2)) > - return; > - > - pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); > - if (!host_ptr_in_ram(vcpu->kvm, pte1)) > - return; > - > - dprintf(debug_fd, "Page Tables:\n"); > - if (*pte2 & (1 << 7)) > - dprintf(debug_fd, " pte4: %016llx pte3: %016llx" > - " pte2: %016llx\n", > - *pte4, *pte3, *pte2); > - else > - dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" > - "llx pte1: %016llx\n", > - *pte4, *pte3, *pte2, *pte1); > -} > - > void kvm_cpu__run(struct kvm_cpu *vcpu) > { > int err; > @@ -454,7 +83,6 @@ int kvm_cpu__start(struct kvm_cpu *cpu) > signal(SIGKVMEXIT, kvm_cpu_signal_handler); > signal(SIGKVMPAUSE, kvm_cpu_signal_handler); > > - kvm_cpu__setup_cpuid(cpu); > kvm_cpu__reset_vcpu(cpu); > > if (cpu->kvm->single_step) > diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c > index 252bd18..7ce1640 100644 > --- a/tools/kvm/kvm.c > +++ b/tools/kvm/kvm.c > @@ -1,10 +1,5 @@ > #include "kvm/kvm.h" > - > -#include "kvm/boot-protocol.h" > -#include "kvm/cpufeature.h" > #include "kvm/read-write.h" > -#include "kvm/interrupt.h" > -#include "kvm/mptable.h" > #include "kvm/util.h" > #include "kvm/mutex.h" > #include "kvm/kvm-cpu.h" > @@ -12,14 +7,11 @@ > > #include <linux/kvm.h> > > -#include <asm/bootparam.h> > - > #include <sys/un.h> > #include <sys/types.h> > #include <sys/socket.h> > #include <sys/ioctl.h> > #include <sys/mman.h> > -#include <sys/stat.h> > #include <stdbool.h> > #include <assert.h> > #include <limits.h> > @@ -58,29 +50,11 @@ const char *kvm_exit_reasons[] = { > DEFINE_KVM_EXIT_REASON(KVM_EXIT_INTERNAL_ERROR), > }; > > -#define DEFINE_KVM_EXT(ext) \ > - .name = #ext, \ > - .code = ext > - > -struct { > - const char *name; > - int code; > -} kvm_req_ext[] = { > - { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, > - { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, > - { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, > - { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, > - { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, > - { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, > - { DEFINE_KVM_EXT(KVM_CAP_HLT) }, > - { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, > - { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, > -}; > - > extern struct kvm *kvm; > extern struct kvm_cpu *kvm_cpus[KVM_NR_CPUS]; > static int pause_event; > static DEFINE_MUTEX(pause_lock); > +extern struct kvm_ext kvm_req_ext[]; > > static char kvm_dir[PATH_MAX]; > > @@ -127,7 +101,9 @@ static int kvm__check_extensions(struct kvm *kvm) > { > unsigned int i; > > - for (i = 0; i < ARRAY_SIZE(kvm_req_ext); i++) { > + for (i = 0; ; i++) { > + if (!kvm_req_ext[i].name) > + break; > if (!kvm__supports_extension(kvm, kvm_req_ext[i].code)) { > pr_error("Unsuppored KVM extension detected: %s", > kvm_req_ext[i].name); > @@ -261,48 +237,6 @@ void kvm__delete(struct kvm *kvm) > free(kvm); > } > > -static bool kvm__cpu_supports_vm(void) > -{ > - struct cpuid_regs regs; > - u32 eax_base; > - int feature; > - > - regs = (struct cpuid_regs) { > - .eax = 0x00, > - }; > - host_cpuid(®s); > - > - switch (regs.ebx) { > - case CPUID_VENDOR_INTEL_1: > - eax_base = 0x00; > - feature = KVM__X86_FEATURE_VMX; > - break; > - > - case CPUID_VENDOR_AMD_1: > - eax_base = 0x80000000; > - feature = KVM__X86_FEATURE_SVM; > - break; > - > - default: > - return false; > - } > - > - regs = (struct cpuid_regs) { > - .eax = eax_base, > - }; > - host_cpuid(®s); > - > - if (regs.eax < eax_base + 0x01) > - return false; > - > - regs = (struct cpuid_regs) { > - .eax = eax_base + 0x01 > - }; > - host_cpuid(®s); > - > - return regs.ecx & (1 << feature); > -} > - > /* > * Note: KVM_SET_USER_MEMORY_REGION assumes that we don't pass overlapping > * memory regions to it. Therefore, be careful if you use this function for > @@ -325,47 +259,6 @@ void kvm__register_mem(struct kvm *kvm, u64 guest_phys, u64 size, void *userspac > die_perror("KVM_SET_USER_MEMORY_REGION ioctl"); > } > > -/* > - * Allocating RAM size bigger than 4GB requires us to leave a gap > - * in the RAM which is used for PCI MMIO, hotplug, and unconfigured > - * devices (see documentation of e820_setup_gap() for details). > - * > - * If we're required to initialize RAM bigger than 4GB, we will create > - * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. > - */ > - > -void kvm__init_ram(struct kvm *kvm) > -{ > - u64 phys_start, phys_size; > - void *host_mem; > - > - if (kvm->ram_size < KVM_32BIT_GAP_START) { > - /* Use a single block of RAM for 32bit RAM */ > - > - phys_start = 0; > - phys_size = kvm->ram_size; > - host_mem = kvm->ram_start; > - > - kvm__register_mem(kvm, phys_start, phys_size, host_mem); > - } else { > - /* First RAM range from zero to the PCI gap: */ > - > - phys_start = 0; > - phys_size = KVM_32BIT_GAP_START; > - host_mem = kvm->ram_start; > - > - kvm__register_mem(kvm, phys_start, phys_size, host_mem); > - > - /* Second RAM range from 4GB to the end of RAM: */ > - > - phys_start = 0x100000000ULL; > - phys_size = kvm->ram_size - phys_size; > - host_mem = kvm->ram_start + phys_start; > - > - kvm__register_mem(kvm, phys_start, phys_size, host_mem); > - } > -} > - > int kvm__recommended_cpus(struct kvm *kvm) > { > int ret; > @@ -410,11 +303,10 @@ int kvm__max_cpus(struct kvm *kvm) > > struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) > { > - struct kvm_pit_config pit_config = { .flags = 0, }; > struct kvm *kvm; > int ret; > > - if (!kvm__cpu_supports_vm()) > + if (!kvm__arch_cpu_supports_vm()) > die("Your CPU does not support hardware virtualization"); > > kvm = kvm__new(); > @@ -442,36 +334,7 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) > if (kvm__check_extensions(kvm)) > die("A required KVM extention is not supported by OS"); > > - ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); > - if (ret < 0) > - die_perror("KVM_SET_TSS_ADDR ioctl"); > - > - ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); > - if (ret < 0) > - die_perror("KVM_CREATE_PIT2 ioctl"); > - > - kvm->ram_size = ram_size; > - > - if (kvm->ram_size < KVM_32BIT_GAP_START) { > - kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > - } else { > - kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > - if (kvm->ram_start != MAP_FAILED) { > - /* > - * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that > - * if we accidently write to it, we will know. > - */ > - mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); > - } > - } > - if (kvm->ram_start == MAP_FAILED) > - die("out of memory"); > - > - madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); > - > - ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); > - if (ret < 0) > - die_perror("KVM_CREATE_IRQCHIP ioctl"); > + kvm__arch_init(kvm, kvm_dev, ram_size, name); > > kvm->name = name; > > @@ -480,141 +343,6 @@ struct kvm *kvm__init(const char *kvm_dev, u64 ram_size, const char *name) > return kvm; > } > > -#define BOOT_LOADER_SELECTOR 0x1000 > -#define BOOT_LOADER_IP 0x0000 > -#define BOOT_LOADER_SP 0x8000 > -#define BOOT_CMDLINE_OFFSET 0x20000 > - > -#define BOOT_PROTOCOL_REQUIRED 0x206 > -#define LOAD_HIGH 0x01 > - > -static int load_flat_binary(struct kvm *kvm, int fd) > -{ > - void *p; > - int nr; > - > - if (lseek(fd, 0, SEEK_SET) < 0) > - die_perror("lseek"); > - > - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > - > - while ((nr = read(fd, p, 65536)) > 0) > - p += nr; > - > - kvm->boot_selector = BOOT_LOADER_SELECTOR; > - kvm->boot_ip = BOOT_LOADER_IP; > - kvm->boot_sp = BOOT_LOADER_SP; > - > - return true; > -} > - > -static const char *BZIMAGE_MAGIC = "HdrS"; > - > -static bool load_bzimage(struct kvm *kvm, int fd_kernel, > - int fd_initrd, const char *kernel_cmdline, u16 vidmode) > -{ > - struct boot_params *kern_boot; > - unsigned long setup_sects; > - struct boot_params boot; > - size_t cmdline_size; > - ssize_t setup_size; > - void *p; > - int nr; > - > - /* > - * See Documentation/x86/boot.txt for details no bzImage on-disk and > - * memory layout. > - */ > - > - if (lseek(fd_kernel, 0, SEEK_SET) < 0) > - die_perror("lseek"); > - > - if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) > - return false; > - > - if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) > - return false; > - > - if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) > - die("Too old kernel"); > - > - if (lseek(fd_kernel, 0, SEEK_SET) < 0) > - die_perror("lseek"); > - > - if (!boot.hdr.setup_sects) > - boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; > - setup_sects = boot.hdr.setup_sects + 1; > - > - setup_size = setup_sects << 9; > - p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > - > - /* copy setup.bin to mem*/ > - if (read(fd_kernel, p, setup_size) != setup_size) > - die_perror("read"); > - > - /* copy vmlinux.bin to BZ_KERNEL_START*/ > - p = guest_flat_to_host(kvm, BZ_KERNEL_START); > - > - while ((nr = read(fd_kernel, p, 65536)) > 0) > - p += nr; > - > - p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); > - if (kernel_cmdline) { > - cmdline_size = strlen(kernel_cmdline) + 1; > - if (cmdline_size > boot.hdr.cmdline_size) > - cmdline_size = boot.hdr.cmdline_size; > - > - memset(p, 0, boot.hdr.cmdline_size); > - memcpy(p, kernel_cmdline, cmdline_size - 1); > - } > - > - kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); > - > - kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; > - kern_boot->hdr.type_of_loader = 0xff; > - kern_boot->hdr.heap_end_ptr = 0xfe00; > - kern_boot->hdr.loadflags |= CAN_USE_HEAP; > - kern_boot->hdr.vid_mode = vidmode; > - > - /* > - * Read initrd image into guest memory > - */ > - if (fd_initrd >= 0) { > - struct stat initrd_stat; > - unsigned long addr; > - > - if (fstat(fd_initrd, &initrd_stat)) > - die_perror("fstat"); > - > - addr = boot.hdr.initrd_addr_max & ~0xfffff; > - for (;;) { > - if (addr < BZ_KERNEL_START) > - die("Not enough memory for initrd"); > - else if (addr < (kvm->ram_size - initrd_stat.st_size)) > - break; > - addr -= 0x100000; > - } > - > - p = guest_flat_to_host(kvm, addr); > - nr = read(fd_initrd, p, initrd_stat.st_size); > - if (nr != initrd_stat.st_size) > - die("Failed to read initrd"); > - > - kern_boot->hdr.ramdisk_image = addr; > - kern_boot->hdr.ramdisk_size = initrd_stat.st_size; > - } > - > - kvm->boot_selector = BOOT_LOADER_SELECTOR; > - /* > - * The real-mode setup code starts at offset 0x200 of a bzImage. See > - * Documentation/x86/boot.txt for details. > - */ > - kvm->boot_ip = BOOT_LOADER_IP + 0x200; > - kvm->boot_sp = BOOT_LOADER_SP; > - > - return true; > -} > - > /* RFC 1952 */ > #define GZIP_ID1 0x1f > #define GZIP_ID2 0x8b > @@ -675,24 +403,6 @@ found_kernel: > return ret; > } > > -/** > - * kvm__setup_bios - inject BIOS into guest system memory > - * @kvm - guest system descriptor > - * > - * This function is a main routine where we poke guest memory > - * and install BIOS there. > - */ > -void kvm__setup_bios(struct kvm *kvm) > -{ > - /* standart minimal configuration */ > - setup_bios(kvm); > - > - /* FIXME: SMP, ACPI and friends here */ > - > - /* MP table */ > - mptable_setup(kvm, kvm->nrcpus); > -} > - > #define TIMER_INTERVAL_NS 1000000 /* 1 msec */ > > /* > @@ -732,27 +442,6 @@ void kvm__stop_timer(struct kvm *kvm) > kvm->timerid = 0; > } > > -void kvm__irq_line(struct kvm *kvm, int irq, int level) > -{ > - struct kvm_irq_level irq_level; > - > - irq_level = (struct kvm_irq_level) { > - { > - .irq = irq, > - }, > - .level = level, > - }; > - > - if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) > - die_perror("KVM_IRQ_LINE failed"); > -} > - > -void kvm__irq_trigger(struct kvm *kvm, int irq) > -{ > - kvm__irq_line(kvm, irq, 1); > - kvm__irq_line(kvm, irq, 0); > -} > - > void kvm__dump_mem(struct kvm *kvm, unsigned long addr, unsigned long size) > { > unsigned char *p; > diff --git a/tools/kvm/bios.c b/tools/kvm/x86/bios.c > similarity index 100% > rename from tools/kvm/bios.c > rename to tools/kvm/x86/bios.c > diff --git a/tools/kvm/bios/.gitignore b/tools/kvm/x86/bios/.gitignore > similarity index 100% > rename from tools/kvm/bios/.gitignore > rename to tools/kvm/x86/bios/.gitignore > diff --git a/tools/kvm/bios/bios-rom.S b/tools/kvm/x86/bios/bios-rom.S > similarity index 80% > rename from tools/kvm/bios/bios-rom.S > rename to tools/kvm/x86/bios/bios-rom.S > index dc52b1e..3269ce9 100644 > --- a/tools/kvm/bios/bios-rom.S > +++ b/tools/kvm/x86/bios/bios-rom.S > @@ -8,5 +8,5 @@ > #endif > > GLOBAL(bios_rom) > - .incbin "bios/bios.bin" > + .incbin "x86/bios/bios.bin" > END(bios_rom) > diff --git a/tools/kvm/bios/e820.c b/tools/kvm/x86/bios/e820.c > similarity index 100% > rename from tools/kvm/bios/e820.c > rename to tools/kvm/x86/bios/e820.c > diff --git a/tools/kvm/bios/entry.S b/tools/kvm/x86/bios/entry.S > similarity index 100% > rename from tools/kvm/bios/entry.S > rename to tools/kvm/x86/bios/entry.S > diff --git a/tools/kvm/bios/gen-offsets.sh b/tools/kvm/x86/bios/gen-offsets.sh > similarity index 100% > rename from tools/kvm/bios/gen-offsets.sh > rename to tools/kvm/x86/bios/gen-offsets.sh > diff --git a/tools/kvm/bios/int10.c b/tools/kvm/x86/bios/int10.c > similarity index 100% > rename from tools/kvm/bios/int10.c > rename to tools/kvm/x86/bios/int10.c > diff --git a/tools/kvm/bios/int15.c b/tools/kvm/x86/bios/int15.c > similarity index 100% > rename from tools/kvm/bios/int15.c > rename to tools/kvm/x86/bios/int15.c > diff --git a/tools/kvm/bios/local.S b/tools/kvm/x86/bios/local.S > similarity index 100% > rename from tools/kvm/bios/local.S > rename to tools/kvm/x86/bios/local.S > diff --git a/tools/kvm/bios/macro.S b/tools/kvm/x86/bios/macro.S > similarity index 100% > rename from tools/kvm/bios/macro.S > rename to tools/kvm/x86/bios/macro.S > diff --git a/tools/kvm/bios/memcpy.c b/tools/kvm/x86/bios/memcpy.c > similarity index 100% > rename from tools/kvm/bios/memcpy.c > rename to tools/kvm/x86/bios/memcpy.c > diff --git a/tools/kvm/bios/rom.ld.S b/tools/kvm/x86/bios/rom.ld.S > similarity index 100% > rename from tools/kvm/bios/rom.ld.S > rename to tools/kvm/x86/bios/rom.ld.S > diff --git a/tools/kvm/cpuid.c b/tools/kvm/x86/cpuid.c > similarity index 100% > rename from tools/kvm/cpuid.c > rename to tools/kvm/x86/cpuid.c > diff --git a/tools/kvm/include/kvm/assembly.h b/tools/kvm/x86/include/kvm/assembly.h > similarity index 100% > rename from tools/kvm/include/kvm/assembly.h > rename to tools/kvm/x86/include/kvm/assembly.h > diff --git a/tools/kvm/include/kvm/barrier.h b/tools/kvm/x86/include/kvm/barrier.h > similarity index 100% > rename from tools/kvm/include/kvm/barrier.h > rename to tools/kvm/x86/include/kvm/barrier.h > diff --git a/tools/kvm/include/kvm/bios-export.h b/tools/kvm/x86/include/kvm/bios-export.h > similarity index 100% > rename from tools/kvm/include/kvm/bios-export.h > rename to tools/kvm/x86/include/kvm/bios-export.h > diff --git a/tools/kvm/include/kvm/bios.h b/tools/kvm/x86/include/kvm/bios.h > similarity index 100% > rename from tools/kvm/include/kvm/bios.h > rename to tools/kvm/x86/include/kvm/bios.h > diff --git a/tools/kvm/include/kvm/boot-protocol.h b/tools/kvm/x86/include/kvm/boot-protocol.h > similarity index 100% > rename from tools/kvm/include/kvm/boot-protocol.h > rename to tools/kvm/x86/include/kvm/boot-protocol.h > diff --git a/tools/kvm/include/kvm/cpufeature.h b/tools/kvm/x86/include/kvm/cpufeature.h > similarity index 100% > rename from tools/kvm/include/kvm/cpufeature.h > rename to tools/kvm/x86/include/kvm/cpufeature.h > diff --git a/tools/kvm/include/kvm/interrupt.h b/tools/kvm/x86/include/kvm/interrupt.h > similarity index 100% > rename from tools/kvm/include/kvm/interrupt.h > rename to tools/kvm/x86/include/kvm/interrupt.h > diff --git a/tools/kvm/x86/include/kvm/kvm-arch.h b/tools/kvm/x86/include/kvm/kvm-arch.h > new file mode 100644 > index 0000000..02aa8b9 > --- /dev/null > +++ b/tools/kvm/x86/include/kvm/kvm-arch.h > @@ -0,0 +1,59 @@ > +#ifndef KVM__KVM_ARCH_H > +#define KVM__KVM_ARCH_H > + > +#include "kvm/interrupt.h" > +#include "kvm/segment.h" > + > +#include <stdbool.h> > +#include <linux/types.h> > +#include <time.h> > + > +#define KVM_NR_CPUS (255) > + > +/* > + * The hole includes VESA framebuffer and PCI memory. > + */ > +#define KVM_32BIT_GAP_SIZE (768 << 20) > +#define KVM_32BIT_GAP_START ((1ULL << 32) - KVM_32BIT_GAP_SIZE) > + > +#define KVM_MMIO_START KVM_32BIT_GAP_START > + > +struct kvm { > + int sys_fd; /* For system ioctls(), i.e. /dev/kvm */ > + int vm_fd; /* For VM ioctls() */ > + timer_t timerid; /* Posix timer for interrupts */ > + > + int nrcpus; /* Number of cpus to run */ > + > + u32 mem_slots; /* for KVM_SET_USER_MEMORY_REGION */ > + > + u64 ram_size; > + void *ram_start; > + > + bool nmi_disabled; > + > + bool single_step; > + > + u16 boot_selector; > + u16 boot_ip; > + u16 boot_sp; > + > + struct interrupt_table interrupt_table; > + > + const char *vmlinux; > + struct disk_image **disks; > + int nr_disks; > + > + const char *name; > +}; > + > +static inline void *guest_flat_to_host(struct kvm *kvm, unsigned long offset); /* In kvm.h */ > + > +static inline void *guest_real_to_host(struct kvm *kvm, u16 selector, u16 offset) > +{ > + unsigned long flat = segment_to_flat(selector, offset); > + > + return guest_flat_to_host(kvm, flat); > +} > + > +#endif /* KVM__KVM_ARCH_H */ > diff --git a/tools/kvm/x86/include/kvm/kvm-cpu-arch.h b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h > new file mode 100644 > index 0000000..ed1c727 > --- /dev/null > +++ b/tools/kvm/x86/include/kvm/kvm-cpu-arch.h > @@ -0,0 +1,33 @@ > +#ifndef KVM__KVM_CPU_ARCH_H > +#define KVM__KVM_CPU_ARCH_H > + > +/* Architecture-specific kvm_cpu definitions. */ > + > +#include <linux/kvm.h> /* for struct kvm_regs */ > + > +#include <pthread.h> > + > +struct kvm; > + > +struct kvm_cpu { > + pthread_t thread; /* VCPU thread */ > + > + unsigned long cpu_id; > + > + struct kvm *kvm; /* parent KVM */ > + int vcpu_fd; /* For VCPU ioctls() */ > + struct kvm_run *kvm_run; > + > + struct kvm_regs regs; > + struct kvm_sregs sregs; > + struct kvm_fpu fpu; > + > + struct kvm_msrs *msrs; /* dynamically allocated */ > + > + u8 is_running; > + u8 paused; > + > + struct kvm_coalesced_mmio_ring *ring; > +}; > + > +#endif /* KVM__KVM_CPU_ARCH_H */ > diff --git a/tools/kvm/include/kvm/mptable.h b/tools/kvm/x86/include/kvm/mptable.h > similarity index 100% > rename from tools/kvm/include/kvm/mptable.h > rename to tools/kvm/x86/include/kvm/mptable.h > diff --git a/tools/kvm/interrupt.c b/tools/kvm/x86/interrupt.c > similarity index 100% > rename from tools/kvm/interrupt.c > rename to tools/kvm/x86/interrupt.c > diff --git a/tools/kvm/x86/ioport.c b/tools/kvm/x86/ioport.c > new file mode 100644 > index 0000000..8a91bf2 > --- /dev/null > +++ b/tools/kvm/x86/ioport.c > @@ -0,0 +1,59 @@ > +#include "kvm/ioport.h" > + > +#include <stdlib.h> > + > +static bool debug_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > +{ > + exit(EXIT_SUCCESS); > +} > + > +static struct ioport_operations debug_ops = { > + .io_out = debug_io_out, > +}; > + > +static bool dummy_io_in(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > +{ > + return true; > +} > + > +static bool dummy_io_out(struct ioport *ioport, struct kvm *kvm, u16 port, void *data, int size) > +{ > + return true; > +} > + > +static struct ioport_operations dummy_read_write_ioport_ops = { > + .io_in = dummy_io_in, > + .io_out = dummy_io_out, > +}; > + > +static struct ioport_operations dummy_write_only_ioport_ops = { > + .io_out = dummy_io_out, > +}; > + > +void ioport__setup_arch(void) > +{ > + /* Legacy ioport setup */ > + > + /* 0x0020 - 0x003F - 8259A PIC 1 */ > + ioport__register(0x0020, &dummy_read_write_ioport_ops, 2, NULL); > + > + /* PORT 0040-005F - PIT - PROGRAMMABLE INTERVAL TIMER (8253, 8254) */ > + ioport__register(0x0040, &dummy_read_write_ioport_ops, 4, NULL); > + > + /* 0x00A0 - 0x00AF - 8259A PIC 2 */ > + ioport__register(0x00A0, &dummy_read_write_ioport_ops, 2, NULL); > + > + /* PORT 00E0-00EF are 'motherboard specific' so we use them for our > + internal debugging purposes. */ > + ioport__register(IOPORT_DBG, &debug_ops, 1, NULL); > + > + /* PORT 00ED - DUMMY PORT FOR DELAY??? */ > + ioport__register(0x00ED, &dummy_write_only_ioport_ops, 1, NULL); > + > + /* 0x00F0 - 0x00FF - Math co-processor */ > + ioport__register(0x00F0, &dummy_write_only_ioport_ops, 2, NULL); > + > + /* PORT 03D4-03D5 - COLOR VIDEO - CRT CONTROL REGISTERS */ > + ioport__register(0x03D4, &dummy_read_write_ioport_ops, 1, NULL); > + ioport__register(0x03D5, &dummy_write_only_ioport_ops, 1, NULL); > +} > diff --git a/tools/kvm/irq.c b/tools/kvm/x86/irq.c > similarity index 100% > rename from tools/kvm/irq.c > rename to tools/kvm/x86/irq.c > diff --git a/tools/kvm/x86/kvm-cpu.c b/tools/kvm/x86/kvm-cpu.c > new file mode 100644 > index 0000000..b26b208 > --- /dev/null > +++ b/tools/kvm/x86/kvm-cpu.c > @@ -0,0 +1,383 @@ > +#include "kvm/kvm-cpu.h" > + > +#include "kvm/symbol.h" > +#include "kvm/util.h" > +#include "kvm/kvm.h" > + > +#include <asm/msr-index.h> > + > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <signal.h> > +#include <stdlib.h> > +#include <string.h> > +#include <errno.h> > +#include <stdio.h> > + > +static int debug_fd; > + > +void kvm_cpu__set_debug_fd(int fd) > +{ > + debug_fd = fd; > +} > + > +int kvm_cpu__get_debug_fd(void) > +{ > + return debug_fd; > +} > + > +static inline bool is_in_protected_mode(struct kvm_cpu *vcpu) > +{ > + return vcpu->sregs.cr0 & 0x01; > +} > + > +static inline u64 ip_to_flat(struct kvm_cpu *vcpu, u64 ip) > +{ > + u64 cs; > + > + /* > + * NOTE! We should take code segment base address into account here. > + * Luckily it's usually zero because Linux uses flat memory model. > + */ > + if (is_in_protected_mode(vcpu)) > + return ip; > + > + cs = vcpu->sregs.cs.selector; > + > + return ip + (cs << 4); > +} > + > +static inline u32 selector_to_base(u16 selector) > +{ > + /* > + * KVM on Intel requires 'base' to be 'selector * 16' in real mode. > + */ > + return (u32)selector * 16; > +} > + > +static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm) > +{ > + struct kvm_cpu *vcpu; > + > + vcpu = calloc(1, sizeof *vcpu); > + if (!vcpu) > + return NULL; > + > + vcpu->kvm = kvm; > + > + return vcpu; > +} > + > +void kvm_cpu__delete(struct kvm_cpu *vcpu) > +{ > + if (vcpu->msrs) > + free(vcpu->msrs); > + > + free(vcpu); > +} > + > +struct kvm_cpu *kvm_cpu__init(struct kvm *kvm, unsigned long cpu_id) > +{ > + struct kvm_cpu *vcpu; > + int mmap_size; > + int coalesced_offset; > + > + vcpu = kvm_cpu__new(kvm); > + if (!vcpu) > + return NULL; > + > + vcpu->cpu_id = cpu_id; > + > + vcpu->vcpu_fd = ioctl(vcpu->kvm->vm_fd, KVM_CREATE_VCPU, cpu_id); > + if (vcpu->vcpu_fd < 0) > + die_perror("KVM_CREATE_VCPU ioctl"); > + > + mmap_size = ioctl(vcpu->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0); > + if (mmap_size < 0) > + die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl"); > + > + vcpu->kvm_run = mmap(NULL, mmap_size, PROT_RW, MAP_SHARED, vcpu->vcpu_fd, 0); > + if (vcpu->kvm_run == MAP_FAILED) > + die("unable to mmap vcpu fd"); > + > + coalesced_offset = ioctl(kvm->sys_fd, KVM_CHECK_EXTENSION, KVM_CAP_COALESCED_MMIO); > + if (coalesced_offset) > + vcpu->ring = (void *)vcpu->kvm_run + (coalesced_offset * PAGE_SIZE); > + > + vcpu->is_running = true; > + > + return vcpu; > +} > + > +static struct kvm_msrs *kvm_msrs__new(size_t nmsrs) > +{ > + struct kvm_msrs *vcpu = calloc(1, sizeof(*vcpu) + (sizeof(struct kvm_msr_entry) * nmsrs)); > + > + if (!vcpu) > + die("out of memory"); > + > + return vcpu; > +} > + > +#define KVM_MSR_ENTRY(_index, _data) \ > + (struct kvm_msr_entry) { .index = _index, .data = _data } > + > +static void kvm_cpu__setup_msrs(struct kvm_cpu *vcpu) > +{ > + unsigned long ndx = 0; > + > + vcpu->msrs = kvm_msrs__new(100); > + > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP, 0x0); > +#ifdef CONFIG_X86_64 > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_STAR, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_CSTAR, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_KERNEL_GS_BASE, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_SYSCALL_MASK, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_LSTAR, 0x0); > +#endif > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TSC, 0x0); > + vcpu->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_MISC_ENABLE, > + MSR_IA32_MISC_ENABLE_FAST_STRING); > + > + vcpu->msrs->nmsrs = ndx; > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_MSRS, vcpu->msrs) < 0) > + die_perror("KVM_SET_MSRS failed"); > +} > + > +static void kvm_cpu__setup_fpu(struct kvm_cpu *vcpu) > +{ > + vcpu->fpu = (struct kvm_fpu) { > + .fcw = 0x37f, > + .mxcsr = 0x1f80, > + }; > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_FPU, &vcpu->fpu) < 0) > + die_perror("KVM_SET_FPU failed"); > +} > + > +static void kvm_cpu__setup_regs(struct kvm_cpu *vcpu) > +{ > + vcpu->regs = (struct kvm_regs) { > + /* We start the guest in 16-bit real mode */ > + .rflags = 0x0000000000000002ULL, > + > + .rip = vcpu->kvm->boot_ip, > + .rsp = vcpu->kvm->boot_sp, > + .rbp = vcpu->kvm->boot_sp, > + }; > + > + if (vcpu->regs.rip > USHRT_MAX) > + die("ip 0x%llx is too high for real mode", (u64) vcpu->regs.rip); > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_REGS, &vcpu->regs) < 0) > + die_perror("KVM_SET_REGS failed"); > +} > + > +static void kvm_cpu__setup_sregs(struct kvm_cpu *vcpu) > +{ > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > + die_perror("KVM_GET_SREGS failed"); > + > + vcpu->sregs.cs.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.cs.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.ss.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.ss.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.ds.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.ds.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.es.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.es.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.fs.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.fs.base = selector_to_base(vcpu->kvm->boot_selector); > + vcpu->sregs.gs.selector = vcpu->kvm->boot_selector; > + vcpu->sregs.gs.base = selector_to_base(vcpu->kvm->boot_selector); > + > + if (ioctl(vcpu->vcpu_fd, KVM_SET_SREGS, &vcpu->sregs) < 0) > + die_perror("KVM_SET_SREGS failed"); > +} > + > +/** > + * kvm_cpu__reset_vcpu - reset virtual CPU to a known state > + */ > +void kvm_cpu__reset_vcpu(struct kvm_cpu *vcpu) > +{ > + kvm_cpu__setup_cpuid(vcpu); > + kvm_cpu__setup_sregs(vcpu); > + kvm_cpu__setup_regs(vcpu); > + kvm_cpu__setup_fpu(vcpu); > + kvm_cpu__setup_msrs(vcpu); > +} > + > +static void print_dtable(const char *name, struct kvm_dtable *dtable) > +{ > + dprintf(debug_fd, " %s %016llx %08hx\n", > + name, (u64) dtable->base, (u16) dtable->limit); > +} > + > +static void print_segment(const char *name, struct kvm_segment *seg) > +{ > + dprintf(debug_fd, " %s %04hx %016llx %08x %02hhx %x %x %x %x %x %x %x\n", > + name, (u16) seg->selector, (u64) seg->base, (u32) seg->limit, > + (u8) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl); > +} > + > +void kvm_cpu__show_registers(struct kvm_cpu *vcpu) > +{ > + unsigned long cr0, cr2, cr3; > + unsigned long cr4, cr8; > + unsigned long rax, rbx, rcx; > + unsigned long rdx, rsi, rdi; > + unsigned long rbp, r8, r9; > + unsigned long r10, r11, r12; > + unsigned long r13, r14, r15; > + unsigned long rip, rsp; > + struct kvm_sregs sregs; > + unsigned long rflags; > + struct kvm_regs regs; > + int i; > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, ®s) < 0) > + die("KVM_GET_REGS failed"); > + > + rflags = regs.rflags; > + > + rip = regs.rip; rsp = regs.rsp; > + rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx; > + rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi; > + rbp = regs.rbp; r8 = regs.r8; r9 = regs.r9; > + r10 = regs.r10; r11 = regs.r11; r12 = regs.r12; > + r13 = regs.r13; r14 = regs.r14; r15 = regs.r15; > + > + dprintf(debug_fd, "\n Registers:\n"); > + dprintf(debug_fd, " ----------\n"); > + dprintf(debug_fd, " rip: %016lx rsp: %016lx flags: %016lx\n", rip, rsp, rflags); > + dprintf(debug_fd, " rax: %016lx rbx: %016lx rcx: %016lx\n", rax, rbx, rcx); > + dprintf(debug_fd, " rdx: %016lx rsi: %016lx rdi: %016lx\n", rdx, rsi, rdi); > + dprintf(debug_fd, " rbp: %016lx r8: %016lx r9: %016lx\n", rbp, r8, r9); > + dprintf(debug_fd, " r10: %016lx r11: %016lx r12: %016lx\n", r10, r11, r12); > + dprintf(debug_fd, " r13: %016lx r14: %016lx r15: %016lx\n", r13, r14, r15); > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &sregs) < 0) > + die("KVM_GET_REGS failed"); > + > + cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3; > + cr4 = sregs.cr4; cr8 = sregs.cr8; > + > + dprintf(debug_fd, " cr0: %016lx cr2: %016lx cr3: %016lx\n", cr0, cr2, cr3); > + dprintf(debug_fd, " cr4: %016lx cr8: %016lx\n", cr4, cr8); > + dprintf(debug_fd, "\n Segment registers:\n"); > + dprintf(debug_fd, " ------------------\n"); > + dprintf(debug_fd, " register selector base limit type p dpl db s l g avl\n"); > + print_segment("cs ", &sregs.cs); > + print_segment("ss ", &sregs.ss); > + print_segment("ds ", &sregs.ds); > + print_segment("es ", &sregs.es); > + print_segment("fs ", &sregs.fs); > + print_segment("gs ", &sregs.gs); > + print_segment("tr ", &sregs.tr); > + print_segment("ldt", &sregs.ldt); > + print_dtable("gdt", &sregs.gdt); > + print_dtable("idt", &sregs.idt); > + > + dprintf(debug_fd, "\n APIC:\n"); > + dprintf(debug_fd, " -----\n"); > + dprintf(debug_fd, " efer: %016llx apic base: %016llx nmi: %s\n", > + (u64) sregs.efer, (u64) sregs.apic_base, > + (vcpu->kvm->nmi_disabled ? "disabled" : "enabled")); > + > + dprintf(debug_fd, "\n Interrupt bitmap:\n"); > + dprintf(debug_fd, " -----------------\n"); > + for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++) > + dprintf(debug_fd, " %016llx", (u64) sregs.interrupt_bitmap[i]); > + dprintf(debug_fd, "\n"); > +} > + > +#define MAX_SYM_LEN 128 > + > +void kvm_cpu__show_code(struct kvm_cpu *vcpu) > +{ > + unsigned int code_bytes = 64; > + unsigned int code_prologue = code_bytes * 43 / 64; > + unsigned int code_len = code_bytes; > + char sym[MAX_SYM_LEN]; > + unsigned char c; > + unsigned int i; > + u8 *ip; > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_REGS, &vcpu->regs) < 0) > + die("KVM_GET_REGS failed"); > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > + die("KVM_GET_SREGS failed"); > + > + ip = guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip) - code_prologue); > + > + dprintf(debug_fd, "\n Code:\n"); > + dprintf(debug_fd, " -----\n"); > + > + symbol__lookup(vcpu->kvm, vcpu->regs.rip, sym, MAX_SYM_LEN); > + > + dprintf(debug_fd, " rip: [<%016lx>] %s\n\n", (unsigned long) vcpu->regs.rip, sym); > + > + for (i = 0; i < code_len; i++, ip++) { > + if (!host_ptr_in_ram(vcpu->kvm, ip)) > + break; > + > + c = *ip; > + > + if (ip == guest_flat_to_host(vcpu->kvm, ip_to_flat(vcpu, vcpu->regs.rip))) > + dprintf(debug_fd, " <%02x>", c); > + else > + dprintf(debug_fd, " %02x", c); > + } > + > + dprintf(debug_fd, "\n"); > + > + dprintf(debug_fd, "\n Stack:\n"); > + dprintf(debug_fd, " ------\n"); > + kvm__dump_mem(vcpu->kvm, vcpu->regs.rsp, 32); > +} > + > +void kvm_cpu__show_page_tables(struct kvm_cpu *vcpu) > +{ > + u64 *pte1; > + u64 *pte2; > + u64 *pte3; > + u64 *pte4; > + > + if (!is_in_protected_mode(vcpu)) > + return; > + > + if (ioctl(vcpu->vcpu_fd, KVM_GET_SREGS, &vcpu->sregs) < 0) > + die("KVM_GET_SREGS failed"); > + > + pte4 = guest_flat_to_host(vcpu->kvm, vcpu->sregs.cr3); > + if (!host_ptr_in_ram(vcpu->kvm, pte4)) > + return; > + > + pte3 = guest_flat_to_host(vcpu->kvm, (*pte4 & ~0xfff)); > + if (!host_ptr_in_ram(vcpu->kvm, pte3)) > + return; > + > + pte2 = guest_flat_to_host(vcpu->kvm, (*pte3 & ~0xfff)); > + if (!host_ptr_in_ram(vcpu->kvm, pte2)) > + return; > + > + pte1 = guest_flat_to_host(vcpu->kvm, (*pte2 & ~0xfff)); > + if (!host_ptr_in_ram(vcpu->kvm, pte1)) > + return; > + > + dprintf(debug_fd, "Page Tables:\n"); > + if (*pte2 & (1 << 7)) > + dprintf(debug_fd, " pte4: %016llx pte3: %016llx" > + " pte2: %016llx\n", > + *pte4, *pte3, *pte2); > + else > + dprintf(debug_fd, " pte4: %016llx pte3: %016llx pte2: %016" > + "llx pte1: %016llx\n", > + *pte4, *pte3, *pte2, *pte1); > +} > diff --git a/tools/kvm/x86/kvm.c b/tools/kvm/x86/kvm.c > new file mode 100644 > index 0000000..ac6c91e > --- /dev/null > +++ b/tools/kvm/x86/kvm.c > @@ -0,0 +1,330 @@ > +#include "kvm/kvm.h" > +#include "kvm/boot-protocol.h" > +#include "kvm/cpufeature.h" > +#include "kvm/interrupt.h" > +#include "kvm/mptable.h" > +#include "kvm/util.h" > + > +#include <asm/bootparam.h> > +#include <linux/kvm.h> > + > +#include <sys/types.h> > +#include <sys/ioctl.h> > +#include <sys/mman.h> > +#include <sys/stat.h> > +#include <stdbool.h> > +#include <assert.h> > +#include <stdlib.h> > +#include <string.h> > +#include <unistd.h> > +#include <stdio.h> > +#include <fcntl.h> > +#include <asm/unistd.h> > + > +struct kvm_ext kvm_req_ext[] = { > + { DEFINE_KVM_EXT(KVM_CAP_COALESCED_MMIO) }, > + { DEFINE_KVM_EXT(KVM_CAP_SET_TSS_ADDR) }, > + { DEFINE_KVM_EXT(KVM_CAP_PIT2) }, > + { DEFINE_KVM_EXT(KVM_CAP_USER_MEMORY) }, > + { DEFINE_KVM_EXT(KVM_CAP_IRQ_ROUTING) }, > + { DEFINE_KVM_EXT(KVM_CAP_IRQCHIP) }, > + { DEFINE_KVM_EXT(KVM_CAP_HLT) }, > + { DEFINE_KVM_EXT(KVM_CAP_IRQ_INJECT_STATUS) }, > + { DEFINE_KVM_EXT(KVM_CAP_EXT_CPUID) }, > + { 0, 0 } > +}; > + > +bool kvm__arch_cpu_supports_vm(void) > +{ > + struct cpuid_regs regs; > + u32 eax_base; > + int feature; > + > + regs = (struct cpuid_regs) { > + .eax = 0x00, > + }; > + host_cpuid(®s); > + > + switch (regs.ebx) { > + case CPUID_VENDOR_INTEL_1: > + eax_base = 0x00; > + feature = KVM__X86_FEATURE_VMX; > + break; > + > + case CPUID_VENDOR_AMD_1: > + eax_base = 0x80000000; > + feature = KVM__X86_FEATURE_SVM; > + break; > + > + default: > + return false; > + } > + > + regs = (struct cpuid_regs) { > + .eax = eax_base, > + }; > + host_cpuid(®s); > + > + if (regs.eax < eax_base + 0x01) > + return false; > + > + regs = (struct cpuid_regs) { > + .eax = eax_base + 0x01 > + }; > + host_cpuid(®s); > + > + return regs.ecx & (1 << feature); > +} > + > +/* > + * Allocating RAM size bigger than 4GB requires us to leave a gap > + * in the RAM which is used for PCI MMIO, hotplug, and unconfigured > + * devices (see documentation of e820_setup_gap() for details). > + * > + * If we're required to initialize RAM bigger than 4GB, we will create > + * a gap between 0xe0000000 and 0x100000000 in the guest virtual mem space. > + */ > + > +void kvm__init_ram(struct kvm *kvm) > +{ > + u64 phys_start, phys_size; > + void *host_mem; > + > + if (kvm->ram_size < KVM_32BIT_GAP_START) { > + /* Use a single block of RAM for 32bit RAM */ > + > + phys_start = 0; > + phys_size = kvm->ram_size; > + host_mem = kvm->ram_start; > + > + kvm__register_mem(kvm, phys_start, phys_size, host_mem); > + } else { > + /* First RAM range from zero to the PCI gap: */ > + > + phys_start = 0; > + phys_size = KVM_32BIT_GAP_START; > + host_mem = kvm->ram_start; > + > + kvm__register_mem(kvm, phys_start, phys_size, host_mem); > + > + /* Second RAM range from 4GB to the end of RAM: */ > + > + phys_start = 0x100000000ULL; > + phys_size = kvm->ram_size - phys_size; > + host_mem = kvm->ram_start + phys_start; > + > + kvm__register_mem(kvm, phys_start, phys_size, host_mem); > + } > +} > + > +/* Architecture-specific KVM init */ > +void kvm__arch_init(struct kvm *kvm, const char *kvm_dev, u64 ram_size, const char *name) > +{ > + struct kvm_pit_config pit_config = { .flags = 0, }; > + int ret; > + > + ret = ioctl(kvm->vm_fd, KVM_SET_TSS_ADDR, 0xfffbd000); > + if (ret < 0) > + die_perror("KVM_SET_TSS_ADDR ioctl"); > + > + ret = ioctl(kvm->vm_fd, KVM_CREATE_PIT2, &pit_config); > + if (ret < 0) > + die_perror("KVM_CREATE_PIT2 ioctl"); > + > + kvm->ram_size = ram_size; > + > + if (kvm->ram_size < KVM_32BIT_GAP_START) { > + kvm->ram_start = mmap(NULL, ram_size, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > + } else { > + kvm->ram_start = mmap(NULL, ram_size + KVM_32BIT_GAP_SIZE, PROT_RW, MAP_ANON_NORESERVE, -1, 0); > + if (kvm->ram_start != MAP_FAILED) { > + /* > + * We mprotect the gap (see kvm__init_ram() for details) PROT_NONE so that > + * if we accidently write to it, we will know. > + */ > + mprotect(kvm->ram_start + KVM_32BIT_GAP_START, KVM_32BIT_GAP_SIZE, PROT_NONE); > + } > + } > + if (kvm->ram_start == MAP_FAILED) > + die("out of memory"); > + > + madvise(kvm->ram_start, kvm->ram_size, MADV_MERGEABLE); > + > + ret = ioctl(kvm->vm_fd, KVM_CREATE_IRQCHIP); > + if (ret < 0) > + die_perror("KVM_CREATE_IRQCHIP ioctl"); > +} > + > +void kvm__irq_line(struct kvm *kvm, int irq, int level) > +{ > + struct kvm_irq_level irq_level; > + > + irq_level = (struct kvm_irq_level) { > + { > + .irq = irq, > + }, > + .level = level, > + }; > + > + if (ioctl(kvm->vm_fd, KVM_IRQ_LINE, &irq_level) < 0) > + die_perror("KVM_IRQ_LINE failed"); > +} > + > +void kvm__irq_trigger(struct kvm *kvm, int irq) > +{ > + kvm__irq_line(kvm, irq, 1); > + kvm__irq_line(kvm, irq, 0); > +} > + > +#define BOOT_LOADER_SELECTOR 0x1000 > +#define BOOT_LOADER_IP 0x0000 > +#define BOOT_LOADER_SP 0x8000 > +#define BOOT_CMDLINE_OFFSET 0x20000 > + > +#define BOOT_PROTOCOL_REQUIRED 0x206 > +#define LOAD_HIGH 0x01 > + > +int load_flat_binary(struct kvm *kvm, int fd) > +{ > + void *p; > + int nr; > + > + if (lseek(fd, 0, SEEK_SET) < 0) > + die_perror("lseek"); > + > + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > + > + while ((nr = read(fd, p, 65536)) > 0) > + p += nr; > + > + kvm->boot_selector = BOOT_LOADER_SELECTOR; > + kvm->boot_ip = BOOT_LOADER_IP; > + kvm->boot_sp = BOOT_LOADER_SP; > + > + return true; > +} > + > +static const char *BZIMAGE_MAGIC = "HdrS"; > + > +bool load_bzimage(struct kvm *kvm, int fd_kernel, > + int fd_initrd, const char *kernel_cmdline, u16 vidmode) > +{ > + struct boot_params *kern_boot; > + unsigned long setup_sects; > + struct boot_params boot; > + size_t cmdline_size; > + ssize_t setup_size; > + void *p; > + int nr; > + > + /* > + * See Documentation/x86/boot.txt for details no bzImage on-disk and > + * memory layout. > + */ > + > + if (lseek(fd_kernel, 0, SEEK_SET) < 0) > + die_perror("lseek"); > + > + if (read(fd_kernel, &boot, sizeof(boot)) != sizeof(boot)) > + return false; > + > + if (memcmp(&boot.hdr.header, BZIMAGE_MAGIC, strlen(BZIMAGE_MAGIC))) > + return false; > + > + if (boot.hdr.version < BOOT_PROTOCOL_REQUIRED) > + die("Too old kernel"); > + > + if (lseek(fd_kernel, 0, SEEK_SET) < 0) > + die_perror("lseek"); > + > + if (!boot.hdr.setup_sects) > + boot.hdr.setup_sects = BZ_DEFAULT_SETUP_SECTS; > + setup_sects = boot.hdr.setup_sects + 1; > + > + setup_size = setup_sects << 9; > + p = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, BOOT_LOADER_IP); > + > + /* copy setup.bin to mem*/ > + if (read(fd_kernel, p, setup_size) != setup_size) > + die_perror("read"); > + > + /* copy vmlinux.bin to BZ_KERNEL_START*/ > + p = guest_flat_to_host(kvm, BZ_KERNEL_START); > + > + while ((nr = read(fd_kernel, p, 65536)) > 0) > + p += nr; > + > + p = guest_flat_to_host(kvm, BOOT_CMDLINE_OFFSET); > + if (kernel_cmdline) { > + cmdline_size = strlen(kernel_cmdline) + 1; > + if (cmdline_size > boot.hdr.cmdline_size) > + cmdline_size = boot.hdr.cmdline_size; > + > + memset(p, 0, boot.hdr.cmdline_size); > + memcpy(p, kernel_cmdline, cmdline_size - 1); > + } > + > + kern_boot = guest_real_to_host(kvm, BOOT_LOADER_SELECTOR, 0x00); > + > + kern_boot->hdr.cmd_line_ptr = BOOT_CMDLINE_OFFSET; > + kern_boot->hdr.type_of_loader = 0xff; > + kern_boot->hdr.heap_end_ptr = 0xfe00; > + kern_boot->hdr.loadflags |= CAN_USE_HEAP; > + kern_boot->hdr.vid_mode = vidmode; > + > + /* > + * Read initrd image into guest memory > + */ > + if (fd_initrd >= 0) { > + struct stat initrd_stat; > + unsigned long addr; > + > + if (fstat(fd_initrd, &initrd_stat)) > + die_perror("fstat"); > + > + addr = boot.hdr.initrd_addr_max & ~0xfffff; > + for (;;) { > + if (addr < BZ_KERNEL_START) > + die("Not enough memory for initrd"); > + else if (addr < (kvm->ram_size - initrd_stat.st_size)) > + break; > + addr -= 0x100000; > + } > + > + p = guest_flat_to_host(kvm, addr); > + nr = read(fd_initrd, p, initrd_stat.st_size); > + if (nr != initrd_stat.st_size) > + die("Failed to read initrd"); > + > + kern_boot->hdr.ramdisk_image = addr; > + kern_boot->hdr.ramdisk_size = initrd_stat.st_size; > + } > + > + kvm->boot_selector = BOOT_LOADER_SELECTOR; > + /* > + * The real-mode setup code starts at offset 0x200 of a bzImage. See > + * Documentation/x86/boot.txt for details. > + */ > + kvm->boot_ip = BOOT_LOADER_IP + 0x200; > + kvm->boot_sp = BOOT_LOADER_SP; > + > + return true; > +} > + > +/** > + * kvm__arch_setup_firmware - inject BIOS into guest system memory > + * @kvm - guest system descriptor > + * > + * This function is a main routine where we poke guest memory > + * and install BIOS there. > + */ > +void kvm__arch_setup_firmware(struct kvm *kvm) > +{ > + /* standart minimal configuration */ > + setup_bios(kvm); > + > + /* FIXME: SMP, ACPI and friends here */ > + > + /* MP table */ > + mptable_setup(kvm, kvm->nrcpus); > +} > diff --git a/tools/kvm/mptable.c b/tools/kvm/x86/mptable.c > similarity index 100% > rename from tools/kvm/mptable.c > rename to tools/kvm/x86/mptable.c > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Sasha. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html