[PATCH] kvm tools: Introduce KVM VCPU data structure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



In preparation for threaded execution model, this patch introduces a KVM VCPU
data structure 'struct kvm_cpu'.

Cc: Asias He <asias.hejun@xxxxxxxxx>
Cc: Cyrill Gorcunov <gorcunov@xxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxx>
Signed-off-by: Pekka Enberg <penberg@xxxxxxxxxx>
---
 tools/kvm/Makefile              |    1 +
 tools/kvm/cpuid.c               |    6 +-
 tools/kvm/include/kvm/kvm-cpu.h |   33 ++++
 tools/kvm/include/kvm/kvm.h     |   16 --
 tools/kvm/ioport.c              |    2 +
 tools/kvm/kvm-cpu.c             |  370 +++++++++++++++++++++++++++++++++++++++
 tools/kvm/kvm-run.c             |   62 ++++---
 tools/kvm/kvm.c                 |  338 -----------------------------------
 8 files changed, 445 insertions(+), 383 deletions(-)
 create mode 100644 tools/kvm/include/kvm/kvm-cpu.h
 create mode 100644 tools/kvm/kvm-cpu.c

diff --git a/tools/kvm/Makefile b/tools/kvm/Makefile
index 4bcfd74..141cdec 100644
--- a/tools/kvm/Makefile
+++ b/tools/kvm/Makefile
@@ -21,6 +21,7 @@ OBJS	+= disk-image.o
 OBJS	+= interrupt.o
 OBJS	+= ioport.o
 OBJS	+= kvm.o
+OBJS	+= kvm-cpu.o
 OBJS	+= main.o
 OBJS	+= mmio.o
 OBJS	+= pci.o
diff --git a/tools/kvm/cpuid.c b/tools/kvm/cpuid.c
index f7cc930..0b26eb1 100644
--- a/tools/kvm/cpuid.c
+++ b/tools/kvm/cpuid.c
@@ -1,3 +1,5 @@
+#include "kvm/kvm-cpu.h"
+
 #include "kvm/kvm.h"
 #include "kvm/util.h"
 
@@ -30,14 +32,14 @@ static void filter_cpuid(struct kvm_cpuid2 *kvm_cpuid)
 	}
 }
 
-void kvm__setup_cpuid(struct kvm *self)
+void kvm_cpu__setup_cpuid(struct kvm_cpu *self)
 {
 	struct kvm_cpuid2 *kvm_cpuid;
 
 	kvm_cpuid = calloc(1, sizeof(*kvm_cpuid) + MAX_KVM_CPUID_ENTRIES * sizeof(*kvm_cpuid->entries));
 
 	kvm_cpuid->nent = MAX_KVM_CPUID_ENTRIES;
-	if (ioctl(self->sys_fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0)
+	if (ioctl(self->kvm->sys_fd, KVM_GET_SUPPORTED_CPUID, kvm_cpuid) < 0)
 		die_perror("KVM_GET_SUPPORTED_CPUID failed");
 
 	filter_cpuid(kvm_cpuid);
diff --git a/tools/kvm/include/kvm/kvm-cpu.h b/tools/kvm/include/kvm/kvm-cpu.h
new file mode 100644
index 0000000..d36dadf
--- /dev/null
+++ b/tools/kvm/include/kvm/kvm-cpu.h
@@ -0,0 +1,33 @@
+#ifndef KVM__KVM_CPU_H
+#define KVM__KVM_CPU_H
+
+#include <linux/kvm.h>	/* for struct kvm_regs */
+
+#include <stdint.h>
+
+struct kvm;
+
+struct kvm_cpu {
+	struct kvm		*kvm;		/* parent KVM */
+	int			vcpu_fd;	/* For VCPU ioctls() */
+	struct kvm_run		*kvm_run;
+
+	struct kvm_regs		regs;
+	struct kvm_sregs	sregs;
+	struct kvm_fpu		fpu;
+
+	struct kvm_msrs		*msrs;		/* dynamically allocated */
+};
+
+struct kvm_cpu *kvm_cpu__init(struct kvm *kvm);
+void kvm_cpu__delete(struct kvm_cpu *self);
+void kvm_cpu__reset_vcpu(struct kvm_cpu *self);
+void kvm_cpu__setup_cpuid(struct kvm_cpu *self);
+void kvm_cpu__enable_singlestep(struct kvm_cpu *self);
+void kvm_cpu__run(struct kvm_cpu *self);
+
+void kvm_cpu__show_code(struct kvm_cpu *self);
+void kvm_cpu__show_registers(struct kvm_cpu *self);
+void kvm_cpu__show_page_tables(struct kvm_cpu *self);
+
+#endif /* KVM__KVM_CPU_H */
diff --git a/tools/kvm/include/kvm/kvm.h b/tools/kvm/include/kvm/kvm.h
index a099307..7af98f9 100644
--- a/tools/kvm/include/kvm/kvm.h
+++ b/tools/kvm/include/kvm/kvm.h
@@ -3,8 +3,6 @@
 
 #include "kvm/interrupt.h"
 
-#include <linux/kvm.h>	/* for struct kvm_regs */
-
 #include <stdbool.h>
 #include <stdint.h>
 #include <time.h>
@@ -12,9 +10,7 @@
 struct kvm {
 	int			sys_fd;		/* For system ioctls(), i.e. /dev/kvm */
 	int			vm_fd;		/* For VM ioctls() */
-	int			vcpu_fd;	/* For VCPU ioctls() */
 	timer_t			timerid;	/* Posix timer for interrupts */
-	struct kvm_run		*kvm_run;
 
 	struct disk_image	*disk_image;
 	uint64_t		ram_size;
@@ -26,25 +22,16 @@ struct kvm {
 	uint16_t		boot_ip;
 	uint16_t		boot_sp;
 
-	struct kvm_regs		regs;
-	struct kvm_sregs	sregs;
-	struct kvm_fpu		fpu;
-	struct kvm_msrs		*msrs;	/* dynamically allocated */
-
 	struct interrupt_table	interrupt_table;
 };
 
 struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size);
 void kvm__delete(struct kvm *self);
-void kvm__setup_cpuid(struct kvm *self);
-void kvm__enable_singlestep(struct kvm *self);
 bool kvm__load_kernel(struct kvm *kvm, const char *kernel_filename,
 			const char *initrd_filename, const char *kernel_cmdline);
-void kvm__reset_vcpu(struct kvm *self);
 void kvm__setup_bios(struct kvm *self);
 void kvm__start_timer(struct kvm *self);
 void kvm__stop_timer(struct kvm *self);
-void kvm__run(struct kvm *self);
 void kvm__irq_line(struct kvm *self, int irq, int level);
 bool kvm__emulate_io(struct kvm *self, uint16_t port, void *data, int direction, int size, uint32_t count);
 bool kvm__emulate_mmio(struct kvm *self, uint64_t phys_addr, uint8_t *data, uint32_t len, uint8_t is_write);
@@ -52,9 +39,6 @@ bool kvm__emulate_mmio(struct kvm *self, uint64_t phys_addr, uint8_t *data, uint
 /*
  * Debugging
  */
-void kvm__show_code(struct kvm *self);
-void kvm__show_registers(struct kvm *self);
-void kvm__show_page_tables(struct kvm *self);
 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size);
 
 extern const char *kvm_exit_reasons[];
diff --git a/tools/kvm/ioport.c b/tools/kvm/ioport.c
index 4579e89..6303571 100644
--- a/tools/kvm/ioport.c
+++ b/tools/kvm/ioport.c
@@ -2,6 +2,8 @@
 
 #include "kvm/kvm.h"
 
+#include <linux/kvm.h>	/* for KVM_EXIT_* */
+
 #include <stdbool.h>
 #include <assert.h>
 #include <limits.h>
diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
new file mode 100644
index 0000000..374adb2
--- /dev/null
+++ b/tools/kvm/kvm-cpu.c
@@ -0,0 +1,370 @@
+#include "kvm/kvm-cpu.h"
+
+#include "kvm/util.h"
+#include "kvm/kvm.h"
+
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+
+static inline bool is_in_protected_mode(struct kvm_cpu *self)
+{
+	return self->sregs.cr0 & 0x01;
+}
+
+static inline uint64_t ip_to_flat(struct kvm_cpu *self, uint64_t ip)
+{
+	uint64_t cs;
+
+	/*
+	 * NOTE! We should take code segment base address into account here.
+	 * Luckily it's usually zero because Linux uses flat memory model.
+	 */
+	if (is_in_protected_mode(self))
+		return ip;
+
+	cs = self->sregs.cs.selector;
+
+	return ip + (cs << 4);
+}
+
+static inline uint32_t selector_to_base(uint16_t selector)
+{
+	/*
+	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
+	 */
+	return (uint32_t)selector * 16;
+}
+
+static struct kvm_cpu *kvm_cpu__new(struct kvm *kvm)
+{
+	struct kvm_cpu *self;
+
+	self		= calloc(1, sizeof *self);
+	if (!self)
+		return NULL;
+
+	self->kvm	= kvm;
+
+	return self;
+}
+
+void kvm_cpu__delete(struct kvm_cpu *self)
+{
+	if (self->msrs)
+		free(self->msrs);
+
+	free(self);
+}
+
+struct kvm_cpu *kvm_cpu__init(struct kvm *kvm)
+{
+	struct kvm_cpu *self;
+	int mmap_size;
+
+	self		= kvm_cpu__new(kvm);
+	if (!self)
+		return NULL;
+
+	self->vcpu_fd = ioctl(self->kvm->vm_fd, KVM_CREATE_VCPU, 0);
+	if (self->vcpu_fd < 0)
+		die_perror("KVM_CREATE_VCPU ioctl");
+
+	mmap_size = ioctl(self->kvm->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
+	if (mmap_size < 0)
+		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
+
+	self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
+	if (self->kvm_run == MAP_FAILED)
+		die("unable to mmap vcpu fd");
+
+	return self;
+}
+
+void kvm_cpu__enable_singlestep(struct kvm_cpu *self)
+{
+	struct kvm_guest_debug debug = {
+		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
+	};
+
+	if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
+		warning("KVM_SET_GUEST_DEBUG failed");
+}
+
+static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
+{
+	struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs));
+
+	if (!self)
+		die("out of memory");
+
+	return self;
+}
+
+#define MSR_IA32_TIME_STAMP_COUNTER	0x10
+
+#define MSR_IA32_SYSENTER_CS		0x174
+#define MSR_IA32_SYSENTER_ESP		0x175
+#define MSR_IA32_SYSENTER_EIP		0x176
+
+#define MSR_IA32_STAR			0xc0000081
+#define MSR_IA32_LSTAR			0xc0000082
+#define MSR_IA32_CSTAR			0xc0000083
+#define MSR_IA32_FMASK			0xc0000084
+#define MSR_IA32_KERNEL_GS_BASE		0xc0000102
+
+#define KVM_MSR_ENTRY(_index, _data)	\
+	(struct kvm_msr_entry) { .index = _index, .data = _data }
+
+static void kvm_cpu__setup_msrs(struct kvm_cpu *self)
+{
+	unsigned long ndx = 0;
+
+	self->msrs = kvm_msrs__new(100);
+
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
+#ifdef CONFIG_X86_64
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,		0x0);
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,		0x0);
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,	0x0);
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,		0x0);
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,		0x0);
+#endif
+	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER,	0x0);
+
+	self->msrs->nmsrs	= ndx;
+
+	if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
+		die_perror("KVM_SET_MSRS failed");
+}
+
+static void kvm_cpu__setup_fpu(struct kvm_cpu *self)
+{
+	self->fpu = (struct kvm_fpu) {
+		.fcw		= 0x37f,
+		.mxcsr		= 0x1f80,
+	};
+
+	if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
+		die_perror("KVM_SET_FPU failed");
+}
+
+static void kvm_cpu__setup_regs(struct kvm_cpu *self)
+{
+	self->regs = (struct kvm_regs) {
+		/* We start the guest in 16-bit real mode  */
+		.rflags		= 0x0000000000000002ULL,
+
+		.rip		= self->kvm->boot_ip,
+		.rsp		= self->kvm->boot_sp,
+		.rbp		= self->kvm->boot_sp,
+	};
+
+	if (self->regs.rip > USHRT_MAX)
+		die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
+
+	if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
+		die_perror("KVM_SET_REGS failed");
+}
+
+static void kvm_cpu__setup_sregs(struct kvm_cpu *self)
+{
+
+	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
+		die_perror("KVM_GET_SREGS failed");
+
+	self->sregs.cs.selector	= self->kvm->boot_selector;
+	self->sregs.cs.base	= selector_to_base(self->kvm->boot_selector);
+	self->sregs.ss.selector	= self->kvm->boot_selector;
+	self->sregs.ss.base	= selector_to_base(self->kvm->boot_selector);
+	self->sregs.ds.selector	= self->kvm->boot_selector;
+	self->sregs.ds.base	= selector_to_base(self->kvm->boot_selector);
+	self->sregs.es.selector	= self->kvm->boot_selector;
+	self->sregs.es.base	= selector_to_base(self->kvm->boot_selector);
+	self->sregs.fs.selector	= self->kvm->boot_selector;
+	self->sregs.fs.base	= selector_to_base(self->kvm->boot_selector);
+	self->sregs.gs.selector	= self->kvm->boot_selector;
+	self->sregs.gs.base	= selector_to_base(self->kvm->boot_selector);
+
+	if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
+		die_perror("KVM_SET_SREGS failed");
+}
+
+/**
+ * kvm_cpu__reset_vcpu - reset virtual CPU to a known state
+ */
+void kvm_cpu__reset_vcpu(struct kvm_cpu *self)
+{
+	kvm_cpu__setup_sregs(self);
+	kvm_cpu__setup_regs(self);
+	kvm_cpu__setup_fpu(self);
+	kvm_cpu__setup_msrs(self);
+}
+
+static void print_dtable(const char *name, struct kvm_dtable *dtable)
+{
+	printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
+		name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
+}
+
+static void print_segment(const char *name, struct kvm_segment *seg)
+{
+	printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
+		name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
+		(uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
+}
+
+void kvm_cpu__show_registers(struct kvm_cpu *self)
+{
+	unsigned long cr0, cr2, cr3;
+	unsigned long cr4, cr8;
+	unsigned long rax, rbx, rcx;
+	unsigned long rdx, rsi, rdi;
+	unsigned long rbp,  r8,  r9;
+	unsigned long r10, r11, r12;
+	unsigned long r13, r14, r15;
+	unsigned long rip, rsp;
+	struct kvm_sregs sregs;
+	unsigned long rflags;
+	struct kvm_regs regs;
+	int i;
+
+	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
+		die("KVM_GET_REGS failed");
+
+	rflags = regs.rflags;
+
+	rip = regs.rip; rsp = regs.rsp;
+	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
+	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
+	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
+	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
+	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
+
+	printf("Registers:\n");
+	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
+	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
+	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
+	printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
+	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
+	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
+
+	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
+		die("KVM_GET_REGS failed");
+
+	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
+	cr4 = sregs.cr4; cr8 = sregs.cr8;
+
+	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
+	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
+	printf("Segment registers:\n");
+	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
+	print_segment("cs ", &sregs.cs);
+	print_segment("ss ", &sregs.ss);
+	print_segment("ds ", &sregs.ds);
+	print_segment("es ", &sregs.es);
+	print_segment("fs ", &sregs.fs);
+	print_segment("gs ", &sregs.gs);
+	print_segment("tr ", &sregs.tr);
+	print_segment("ldt", &sregs.ldt);
+	print_dtable("gdt", &sregs.gdt);
+	print_dtable("idt", &sregs.idt);
+	printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s ]\n",
+		(uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
+		(self->kvm->nmi_disabled ? "disabled" : "enabled"));
+	printf("Interrupt bitmap:\n");
+	printf(" ");
+	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
+		printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
+	printf("\n");
+}
+
+void kvm_cpu__show_code(struct kvm_cpu *self)
+{
+	unsigned int code_bytes = 64;
+	unsigned int code_prologue = code_bytes * 43 / 64;
+	unsigned int code_len = code_bytes;
+	unsigned char c;
+	unsigned int i;
+	uint8_t *ip;
+
+	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
+		die("KVM_GET_REGS failed");
+
+	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
+		die("KVM_GET_SREGS failed");
+
+	ip = guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip) - code_prologue);
+
+	printf("Code: ");
+
+	for (i = 0; i < code_len; i++, ip++) {
+		if (!host_ptr_in_ram(self->kvm, ip))
+			break;
+
+		c = *ip;
+
+		if (ip == guest_flat_to_host(self->kvm, ip_to_flat(self, self->regs.rip)))
+			printf("<%02x> ", c);
+		else
+			printf("%02x ", c);
+	}
+
+	printf("\n");
+
+	printf("Stack:\n");
+	kvm__dump_mem(self->kvm, self->regs.rsp, 32);
+}
+
+void kvm_cpu__show_page_tables(struct kvm_cpu *self)
+{
+	uint64_t *pte1;
+	uint64_t *pte2;
+	uint64_t *pte3;
+	uint64_t *pte4;
+
+	if (!is_in_protected_mode(self))
+		return;
+
+	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
+		die("KVM_GET_SREGS failed");
+
+	pte4	= guest_flat_to_host(self->kvm, self->sregs.cr3);
+	if (!host_ptr_in_ram(self->kvm, pte4))
+		return;
+
+	pte3	= guest_flat_to_host(self->kvm, (*pte4 & ~0xfff));
+	if (!host_ptr_in_ram(self->kvm, pte3))
+		return;
+
+	pte2	= guest_flat_to_host(self->kvm, (*pte3 & ~0xfff));
+	if (!host_ptr_in_ram(self->kvm, pte2))
+		return;
+
+	pte1	= guest_flat_to_host(self->kvm, (*pte2 & ~0xfff));
+	if (!host_ptr_in_ram(self->kvm, pte1))
+		return;
+
+	printf("Page Tables:\n");
+	if (*pte2 & (1 << 7))
+		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
+			"   pte2: %016" PRIx64 "\n",
+			*pte4, *pte3, *pte2);
+	else
+		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: %016"
+			PRIx64 "   pte1: %016" PRIx64 "\n",
+			*pte4, *pte3, *pte2, *pte1);
+}
+
+void kvm_cpu__run(struct kvm_cpu *self)
+{
+	int err;
+
+	err = ioctl(self->vcpu_fd, KVM_RUN, 0);
+	if (err && (errno != EINTR && errno != EAGAIN))
+		die_perror("KVM_RUN failed");
+}
diff --git a/tools/kvm/kvm-run.c b/tools/kvm/kvm-run.c
index 9b0786a..9392818 100644
--- a/tools/kvm/kvm-run.c
+++ b/tools/kvm/kvm-run.c
@@ -10,6 +10,7 @@
 /* user defined header files */
 #include <linux/types.h>
 #include <kvm/kvm.h>
+#include <kvm/kvm-cpu.h>
 #include <kvm/8250-serial.h>
 #include <kvm/virtio-blk.h>
 #include <kvm/virtio-console.h>
@@ -29,6 +30,7 @@
 #define MIN_RAM_SIZE_BYTE	(MIN_RAM_SIZE_MB << MB_SHIFT)
 
 static struct kvm *kvm;
+static struct kvm_cpu *cpu;
 
 static void handle_sigint(int sig)
 {
@@ -37,10 +39,11 @@ static void handle_sigint(int sig)
 
 static void handle_sigquit(int sig)
 {
-	kvm__show_registers(kvm);
-	kvm__show_code(kvm);
-	kvm__show_page_tables(kvm);
+	kvm_cpu__show_registers(cpu);
+	kvm_cpu__show_code(cpu);
+	kvm_cpu__show_page_tables(cpu);
 
+	kvm_cpu__delete(cpu);
 	kvm__delete(kvm);
 
 	exit(1);
@@ -130,13 +133,17 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 
 	kvm = kvm__init(kvm_dev, ram_size);
 
+	cpu = kvm_cpu__init(kvm);
+	if (!cpu)
+		die("unable to initialize KVM VCPU");
+
 	if (image_filename) {
 		kvm->disk_image	= disk_image__open(image_filename);
 		if (!kvm->disk_image)
 			die("unable to load disk image %s", image_filename);
 	}
 
-	kvm__setup_cpuid(kvm);
+	kvm_cpu__setup_cpuid(cpu);
 
 	strcpy(real_cmdline, "notsc nolapic noacpi pci=conf1 console=ttyS0 ");
 	if (!kernel_cmdline || !strstr(kernel_cmdline, "root=")) {
@@ -153,12 +160,12 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 				real_cmdline))
 		die("unable to load kernel %s", kernel_filename);
 
-	kvm__reset_vcpu(kvm);
+	kvm_cpu__reset_vcpu(cpu);
 
 	kvm__setup_bios(kvm);
 
 	if (single_step)
-		kvm__enable_singlestep(kvm);
+		kvm_cpu__enable_singlestep(cpu);
 
 	serial8250__init(kvm);
 
@@ -171,23 +178,23 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 	kvm__start_timer(kvm);
 
 	for (;;) {
-		kvm__run(kvm);
+		kvm_cpu__run(cpu);
 
-		switch (kvm->kvm_run->exit_reason) {
+		switch (cpu->kvm_run->exit_reason) {
 		case KVM_EXIT_DEBUG:
-			kvm__show_registers(kvm);
-			kvm__show_code(kvm);
+			kvm_cpu__show_registers(cpu);
+			kvm_cpu__show_code(cpu);
 			break;
 		case KVM_EXIT_IO: {
 			bool ret;
 
 			ret = kvm__emulate_io(kvm,
-					kvm->kvm_run->io.port,
-					(uint8_t *)kvm->kvm_run +
-					kvm->kvm_run->io.data_offset,
-					kvm->kvm_run->io.direction,
-					kvm->kvm_run->io.size,
-					kvm->kvm_run->io.count);
+					cpu->kvm_run->io.port,
+					(uint8_t *)cpu->kvm_run +
+					cpu->kvm_run->io.data_offset,
+					cpu->kvm_run->io.direction,
+					cpu->kvm_run->io.size,
+					cpu->kvm_run->io.count);
 
 			if (!ret)
 				goto panic_kvm;
@@ -197,10 +204,10 @@ int kvm_cmd_run(int argc, const char **argv, const char *prefix)
 			bool ret;
 
 			ret = kvm__emulate_mmio(kvm,
-					kvm->kvm_run->mmio.phys_addr,
-					kvm->kvm_run->mmio.data,
-					kvm->kvm_run->mmio.len,
-					kvm->kvm_run->mmio.is_write);
+					cpu->kvm_run->mmio.phys_addr,
+					cpu->kvm_run->mmio.data,
+					cpu->kvm_run->mmio.len,
+					cpu->kvm_run->mmio.is_write);
 
 			if (!ret)
 				goto panic_kvm;
@@ -227,15 +234,16 @@ exit_kvm:
 
 panic_kvm:
 	fprintf(stderr, "KVM exit reason: %" PRIu32 " (\"%s\")\n",
-		kvm->kvm_run->exit_reason,
-		kvm_exit_reasons[kvm->kvm_run->exit_reason]);
-	if (kvm->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
+		cpu->kvm_run->exit_reason,
+		kvm_exit_reasons[cpu->kvm_run->exit_reason]);
+	if (cpu->kvm_run->exit_reason == KVM_EXIT_UNKNOWN)
 		fprintf(stderr, "KVM exit code: 0x%" PRIu64 "\n",
-			kvm->kvm_run->hw.hardware_exit_reason);
+			cpu->kvm_run->hw.hardware_exit_reason);
 	disk_image__close(kvm->disk_image);
-	kvm__show_registers(kvm);
-	kvm__show_code(kvm);
-	kvm__show_page_tables(kvm);
+	kvm_cpu__show_registers(cpu);
+	kvm_cpu__show_code(cpu);
+	kvm_cpu__show_page_tables(cpu);
+	kvm_cpu__delete(cpu);
 	kvm__delete(kvm);
 
 	return 1;
diff --git a/tools/kvm/kvm.c b/tools/kvm/kvm.c
index 2cd206d..af1f65f 100644
--- a/tools/kvm/kvm.c
+++ b/tools/kvm/kvm.c
@@ -107,9 +107,6 @@ void kvm__delete(struct kvm *self)
 {
 	kvm__stop_timer(self);
 
-	if (self->msrs)
-		free(self->msrs);
-
 	free(self->ram_start);
 	free(self);
 }
@@ -162,7 +159,6 @@ struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
 	struct kvm_pit_config pit_config = { .flags = 0, };
 	struct kvm *self;
 	long page_size;
-	int mmap_size;
 	int ret;
 
 	if (!kvm__cpu_supports_vm())
@@ -222,31 +218,9 @@ struct kvm *kvm__init(const char *kvm_dev, unsigned long ram_size)
 	if (ret < 0)
 		die_perror("KVM_CREATE_IRQCHIP ioctl");
 
-	self->vcpu_fd = ioctl(self->vm_fd, KVM_CREATE_VCPU, 0);
-	if (self->vcpu_fd < 0)
-		die_perror("KVM_CREATE_VCPU ioctl");
-
-	mmap_size = ioctl(self->sys_fd, KVM_GET_VCPU_MMAP_SIZE, 0);
-	if (mmap_size < 0)
-		die_perror("KVM_GET_VCPU_MMAP_SIZE ioctl");
-
-	self->kvm_run = mmap(NULL, mmap_size, PROT_READ|PROT_WRITE, MAP_SHARED, self->vcpu_fd, 0);
-	if (self->kvm_run == MAP_FAILED)
-		die("unable to mmap vcpu fd");
-
 	return self;
 }
 
-void kvm__enable_singlestep(struct kvm *self)
-{
-	struct kvm_guest_debug debug = {
-		.control	= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP,
-	};
-
-	if (ioctl(self->vcpu_fd, KVM_SET_GUEST_DEBUG, &debug) < 0)
-		warning("KVM_SET_GUEST_DEBUG failed");
-}
-
 #define BOOT_LOADER_SELECTOR	0x1000
 #define BOOT_LOADER_IP		0x0000
 #define BOOT_LOADER_SP		0x8000
@@ -417,154 +391,6 @@ found_kernel:
 	return ret;
 }
 
-static inline uint64_t ip_flat_to_real(struct kvm *self, uint64_t ip)
-{
-	uint64_t cs = self->sregs.cs.selector;
-
-	return ip - (cs << 4);
-}
-
-static inline bool is_in_protected_mode(struct kvm *self)
-{
-	return self->sregs.cr0 & 0x01;
-}
-
-static inline uint64_t ip_to_flat(struct kvm *self, uint64_t ip)
-{
-	uint64_t cs;
-
-	/*
-	 * NOTE! We should take code segment base address into account here.
-	 * Luckily it's usually zero because Linux uses flat memory model.
-	 */
-	if (is_in_protected_mode(self))
-		return ip;
-
-	cs = self->sregs.cs.selector;
-
-	return ip + (cs << 4);
-}
-
-static inline uint32_t selector_to_base(uint16_t selector)
-{
-	/*
-	 * KVM on Intel requires 'base' to be 'selector * 16' in real mode.
-	 */
-	return (uint32_t)selector * 16;
-}
-
-static struct kvm_msrs *kvm_msrs__new(size_t nmsrs)
-{
-	struct kvm_msrs *self = calloc(1, sizeof(*self) + (sizeof(struct kvm_msr_entry) * nmsrs));
-
-	if (!self)
-		die("out of memory");
-
-	return self;
-}
-
-#define MSR_IA32_TIME_STAMP_COUNTER	0x10
-
-#define MSR_IA32_SYSENTER_CS		0x174
-#define MSR_IA32_SYSENTER_ESP		0x175
-#define MSR_IA32_SYSENTER_EIP		0x176
-
-#define MSR_IA32_STAR			0xc0000081
-#define MSR_IA32_LSTAR			0xc0000082
-#define MSR_IA32_CSTAR			0xc0000083
-#define MSR_IA32_FMASK			0xc0000084
-#define MSR_IA32_KERNEL_GS_BASE		0xc0000102
-
-#define KVM_MSR_ENTRY(_index, _data)	\
-	(struct kvm_msr_entry) { .index = _index, .data = _data }
-
-static void kvm__setup_msrs(struct kvm *self)
-{
-	unsigned long ndx = 0;
-
-	self->msrs = kvm_msrs__new(100);
-
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_CS,	0x0);
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_ESP,	0x0);
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_SYSENTER_EIP,	0x0);
-#ifdef CONFIG_X86_64
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_STAR,		0x0);
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_CSTAR,		0x0);
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_KERNEL_GS_BASE,	0x0);
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_FMASK,		0x0);
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_LSTAR,		0x0);
-#endif
-	self->msrs->entries[ndx++] = KVM_MSR_ENTRY(MSR_IA32_TIME_STAMP_COUNTER,	0x0);
-
-	self->msrs->nmsrs	= ndx;
-
-	if (ioctl(self->vcpu_fd, KVM_SET_MSRS, self->msrs) < 0)
-		die_perror("KVM_SET_MSRS failed");
-}
-
-static void kvm__setup_fpu(struct kvm *self)
-{
-	self->fpu = (struct kvm_fpu) {
-		.fcw		= 0x37f,
-		.mxcsr		= 0x1f80,
-	};
-
-	if (ioctl(self->vcpu_fd, KVM_SET_FPU, &self->fpu) < 0)
-		die_perror("KVM_SET_FPU failed");
-}
-
-static void kvm__setup_regs(struct kvm *self)
-{
-	self->regs = (struct kvm_regs) {
-		/* We start the guest in 16-bit real mode  */
-		.rflags		= 0x0000000000000002ULL,
-
-		.rip		= self->boot_ip,
-		.rsp		= self->boot_sp,
-		.rbp		= self->boot_sp,
-	};
-
-	if (self->regs.rip > USHRT_MAX)
-		die("ip 0x%" PRIx64 " is too high for real mode", (uint64_t) self->regs.rip);
-
-	if (ioctl(self->vcpu_fd, KVM_SET_REGS, &self->regs) < 0)
-		die_perror("KVM_SET_REGS failed");
-}
-
-static void kvm__setup_sregs(struct kvm *self)
-{
-
-	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-		die_perror("KVM_GET_SREGS failed");
-
-	self->sregs.cs.selector	= self->boot_selector;
-	self->sregs.cs.base	= selector_to_base(self->boot_selector);
-	self->sregs.ss.selector	= self->boot_selector;
-	self->sregs.ss.base	= selector_to_base(self->boot_selector);
-	self->sregs.ds.selector	= self->boot_selector;
-	self->sregs.ds.base	= selector_to_base(self->boot_selector);
-	self->sregs.es.selector	= self->boot_selector;
-	self->sregs.es.base	= selector_to_base(self->boot_selector);
-	self->sregs.fs.selector	= self->boot_selector;
-	self->sregs.fs.base	= selector_to_base(self->boot_selector);
-	self->sregs.gs.selector	= self->boot_selector;
-	self->sregs.gs.base	= selector_to_base(self->boot_selector);
-
-	if (ioctl(self->vcpu_fd, KVM_SET_SREGS, &self->sregs) < 0)
-		die_perror("KVM_SET_SREGS failed");
-}
-
-/**
- * kvm__reset_vcpu - reset virtual CPU to a known state
- */
-void kvm__reset_vcpu(struct kvm *self)
-{
-	kvm__setup_sregs(self);
-	kvm__setup_regs(self);
-	kvm__setup_fpu(self);
-	kvm__setup_msrs(self);
-}
-
 /**
  * kvm__setup_bios - inject BIOS into guest system memory
  * @self - guest system descriptor
@@ -629,15 +455,6 @@ void kvm__stop_timer(struct kvm *self)
 	self->timerid = 0;
 }
 
-void kvm__run(struct kvm *self)
-{
-	int err;
-
-	err = ioctl(self->vcpu_fd, KVM_RUN, 0);
-	if (err && (errno != EINTR && errno != EAGAIN))
-		die_perror("KVM_RUN failed");
-}
-
 void kvm__irq_line(struct kvm *self, int irq, int level)
 {
 	struct kvm_irq_level irq_level;
@@ -653,161 +470,6 @@ void kvm__irq_line(struct kvm *self, int irq, int level)
 		die_perror("KVM_IRQ_LINE failed");
 }
 
-static void print_dtable(const char *name, struct kvm_dtable *dtable)
-{
-	printf(" %s                 %016" PRIx64 "  %08" PRIx16 "\n",
-		name, (uint64_t) dtable->base, (uint16_t) dtable->limit);
-}
-
-static void print_segment(const char *name, struct kvm_segment *seg)
-{
-	printf(" %s       %04" PRIx16 "      %016" PRIx64 "  %08" PRIx32 "  %02" PRIx8 "    %x %x   %x  %x %x %x %x\n",
-		name, (uint16_t) seg->selector, (uint64_t) seg->base, (uint32_t) seg->limit,
-		(uint8_t) seg->type, seg->present, seg->dpl, seg->db, seg->s, seg->l, seg->g, seg->avl);
-}
-
-void kvm__show_registers(struct kvm *self)
-{
-	unsigned long cr0, cr2, cr3;
-	unsigned long cr4, cr8;
-	unsigned long rax, rbx, rcx;
-	unsigned long rdx, rsi, rdi;
-	unsigned long rbp,  r8,  r9;
-	unsigned long r10, r11, r12;
-	unsigned long r13, r14, r15;
-	unsigned long rip, rsp;
-	struct kvm_sregs sregs;
-	unsigned long rflags;
-	struct kvm_regs regs;
-	int i;
-
-	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &regs) < 0)
-		die("KVM_GET_REGS failed");
-
-	rflags = regs.rflags;
-
-	rip = regs.rip; rsp = regs.rsp;
-	rax = regs.rax; rbx = regs.rbx; rcx = regs.rcx;
-	rdx = regs.rdx; rsi = regs.rsi; rdi = regs.rdi;
-	rbp = regs.rbp; r8  = regs.r8;  r9  = regs.r9;
-	r10 = regs.r10; r11 = regs.r11; r12 = regs.r12;
-	r13 = regs.r13; r14 = regs.r14; r15 = regs.r15;
-
-	printf("Registers:\n");
-	printf(" rip: %016lx   rsp: %016lx flags: %016lx\n", rip, rsp, rflags);
-	printf(" rax: %016lx   rbx: %016lx   rcx: %016lx\n", rax, rbx, rcx);
-	printf(" rdx: %016lx   rsi: %016lx   rdi: %016lx\n", rdx, rsi, rdi);
-	printf(" rbp: %016lx   r8:  %016lx   r9:  %016lx\n", rbp, r8,  r9);
-	printf(" r10: %016lx   r11: %016lx   r12: %016lx\n", r10, r11, r12);
-	printf(" r13: %016lx   r14: %016lx   r15: %016lx\n", r13, r14, r15);
-
-	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &sregs) < 0)
-		die("KVM_GET_REGS failed");
-
-	cr0 = sregs.cr0; cr2 = sregs.cr2; cr3 = sregs.cr3;
-	cr4 = sregs.cr4; cr8 = sregs.cr8;
-
-	printf(" cr0: %016lx   cr2: %016lx   cr3: %016lx\n", cr0, cr2, cr3);
-	printf(" cr4: %016lx   cr8: %016lx\n", cr4, cr8);
-	printf("Segment registers:\n");
-	printf(" register  selector  base              limit     type  p dpl db s l g avl\n");
-	print_segment("cs ", &sregs.cs);
-	print_segment("ss ", &sregs.ss);
-	print_segment("ds ", &sregs.ds);
-	print_segment("es ", &sregs.es);
-	print_segment("fs ", &sregs.fs);
-	print_segment("gs ", &sregs.gs);
-	print_segment("tr ", &sregs.tr);
-	print_segment("ldt", &sregs.ldt);
-	print_dtable("gdt", &sregs.gdt);
-	print_dtable("idt", &sregs.idt);
-	printf(" [ efer: %016" PRIx64 "  apic base: %016" PRIx64 "  nmi: %s ]\n",
-		(uint64_t) sregs.efer, (uint64_t) sregs.apic_base,
-		(self->nmi_disabled ? "disabled" : "enabled"));
-	printf("Interrupt bitmap:\n");
-	printf(" ");
-	for (i = 0; i < (KVM_NR_INTERRUPTS + 63) / 64; i++)
-		printf("%016" PRIx64 " ", (uint64_t) sregs.interrupt_bitmap[i]);
-	printf("\n");
-}
-
-void kvm__show_code(struct kvm *self)
-{
-	unsigned int code_bytes = 64;
-	unsigned int code_prologue = code_bytes * 43 / 64;
-	unsigned int code_len = code_bytes;
-	unsigned char c;
-	unsigned int i;
-	uint8_t *ip;
-
-	if (ioctl(self->vcpu_fd, KVM_GET_REGS, &self->regs) < 0)
-		die("KVM_GET_REGS failed");
-
-	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-		die("KVM_GET_SREGS failed");
-
-	ip = guest_flat_to_host(self, ip_to_flat(self, self->regs.rip) - code_prologue);
-
-	printf("Code: ");
-
-	for (i = 0; i < code_len; i++, ip++) {
-		if (!host_ptr_in_ram(self, ip))
-			break;
-
-		c = *ip;
-
-		if (ip == guest_flat_to_host(self, ip_to_flat(self, self->regs.rip)))
-			printf("<%02x> ", c);
-		else
-			printf("%02x ", c);
-	}
-
-	printf("\n");
-
-	printf("Stack:\n");
-	kvm__dump_mem(self, self->regs.rsp, 32);
-}
-
-void kvm__show_page_tables(struct kvm *self)
-{
-	uint64_t *pte1;
-	uint64_t *pte2;
-	uint64_t *pte3;
-	uint64_t *pte4;
-
-	if (!is_in_protected_mode(self))
-		return;
-
-	if (ioctl(self->vcpu_fd, KVM_GET_SREGS, &self->sregs) < 0)
-		die("KVM_GET_SREGS failed");
-
-	pte4	= guest_flat_to_host(self, self->sregs.cr3);
-	if (!host_ptr_in_ram(self, pte4))
-		return;
-
-	pte3	= guest_flat_to_host(self, (*pte4 & ~0xfff));
-	if (!host_ptr_in_ram(self, pte3))
-		return;
-
-	pte2	= guest_flat_to_host(self, (*pte3 & ~0xfff));
-	if (!host_ptr_in_ram(self, pte2))
-		return;
-
-	pte1	= guest_flat_to_host(self, (*pte2 & ~0xfff));
-	if (!host_ptr_in_ram(self, pte1))
-		return;
-
-	printf("Page Tables:\n");
-	if (*pte2 & (1 << 7))
-		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64
-			"   pte2: %016" PRIx64 "\n",
-			*pte4, *pte3, *pte2);
-	else
-		printf(" pte4: %016" PRIx64 "   pte3: %016" PRIx64 "   pte2: %016"
-			PRIx64 "   pte1: %016" PRIx64 "\n",
-			*pte4, *pte3, *pte2, *pte1);
-}
-
 void kvm__dump_mem(struct kvm *self, unsigned long addr, unsigned long size)
 {
 	unsigned char *p;
-- 
1.7.0.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux