[PATCH 4/4] kvm: x86: export TSC offset to user-space

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



We need to retrieve a VM's TSC offset in order to use
the host's TSC to merge host and guest traces. This is
explained in detail in this thread:

  [Qemu-devel] [RFC] host and guest kernel trace merging
  https://lists.nongnu.org/archive/html/qemu-devel/2016-03/msg00887.html

Today, the only way to retrieve a VM's TSC offset is
by using the kvm_write_tsc_offset tracepoint. This has
a few problems. First, the tracepoint is only emitted
when the VM boots, which requires a reboot to get it if
the VM is already running. Second, tracepoints are not
supposed to be ABIs in case they need to be consumed by
user-space tools.

This commit exports a VM's TSC offset to user-space via
debugfs. A new file called "tsc-offset" is created in
the VM's debugfs directory. For example:

  /sys/kernel/debug/kvm/51696-10/tsc-offset

This file contains one TSC offset per line, for each
vCPU. For example:

  vcpu0: 18446742405270834952
  vcpu1: 18446742405270834952
  vcpu2: 18446742405270834952
  vcpu3: 18446742405270834952

There are some important observations about this
solution:

 - While all vCPUs TSC offsets should be equal for the
   cases we care about (ie. stable TSC and no write to
   the TSC MSR), I chose to follow the spec and export
   each vCPU's TSC offset (might also be helpful for
   debugging)

 - The TSC offset is only useful after the VM has booted

 - We'll probably need to export the TSC multiplier too.
   However, I've been using only the TSC offset for now.
   So, let's get this merged first and do the TSC multiplier
   as a second step

Signed-off-by: Luiz Capitulino <lcapitulino@xxxxxxxxxx>
---
 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/svm.c              |  1 +
 arch/x86/kvm/vmx.c              |  8 ++++++++
 arch/x86/kvm/x86.c              | 30 ++++++++++++++++++++++++++++++
 4 files changed, 40 insertions(+)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 33ae3a4..5714bbd 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -952,6 +952,7 @@ struct kvm_x86_ops {
 	bool (*has_wbinvd_exit)(void);
 
 	u64 (*read_tsc_offset)(struct kvm_vcpu *vcpu);
+	u64 (*read_cached_tsc_offset)(struct kvm_vcpu *vcpu);
 	void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset);
 
 	u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc);
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index af523d8..c851477 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -5065,6 +5065,7 @@ static struct kvm_x86_ops svm_x86_ops = {
 	.has_wbinvd_exit = svm_has_wbinvd_exit,
 
 	.read_tsc_offset = svm_read_tsc_offset,
+	.read_cached_tsc_offset = svm_read_tsc_offset,
 	.write_tsc_offset = svm_write_tsc_offset,
 	.adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest,
 	.read_l1_tsc = svm_read_l1_tsc,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 5cede40..82dfe42 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -616,6 +616,7 @@ struct vcpu_vmx {
 	u64 hv_deadline_tsc;
 
 	u64 current_tsc_ratio;
+	u64 cached_tsc_offset;
 
 	bool guest_pkru_valid;
 	u32 guest_pkru;
@@ -2608,6 +2609,11 @@ static u64 vmx_read_tsc_offset(struct kvm_vcpu *vcpu)
 	return vmcs_read64(TSC_OFFSET);
 }
 
+static u64 vmx_read_cached_tsc_offset(struct kvm_vcpu *vcpu)
+{
+	return to_vmx(vcpu)->cached_tsc_offset;
+}
+
 /*
  * writes 'offset' into guest's timestamp counter offset register
  */
@@ -2632,6 +2638,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 					   vmcs_read64(TSC_OFFSET), offset);
 		vmcs_write64(TSC_OFFSET, offset);
 	}
+	to_vmx(vcpu)->cached_tsc_offset = offset;
 }
 
 static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment)
@@ -11275,6 +11282,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
 	.has_wbinvd_exit = cpu_has_vmx_wbinvd_exit,
 
 	.read_tsc_offset = vmx_read_tsc_offset,
+	.read_cached_tsc_offset = vmx_read_cached_tsc_offset,
 	.write_tsc_offset = vmx_write_tsc_offset,
 	.adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest,
 	.read_l1_tsc = vmx_read_l1_tsc,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 18dfbac..75a8e23 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -54,6 +54,7 @@
 #include <linux/pvclock_gtod.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
+#include <linux/debugfs.h>
 #include <trace/events/kvm.h>
 
 #include <asm/debugreg.h>
@@ -7779,8 +7780,37 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	return 0;
 }
 
+static int tsc_offset_show(struct seq_file *m, void *data)
+{
+	struct kvm *kvm = m->private;
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	kvm_for_each_vcpu(i, vcpu, kvm)
+		seq_printf(m, "vcpu%d: %llu\n",
+				vcpu->vcpu_id, kvm_x86_ops->read_cached_tsc_offset(vcpu));
+
+	return 0;
+}
+
+static int tsc_offset_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, tsc_offset_show, inode->i_private);
+}
+
+static const struct file_operations tsc_offset_fops = {
+	.owner		=	THIS_MODULE,
+	.open		=	tsc_offset_open,
+	.read		=	seq_read,
+	.llseek		=	seq_lseek,
+	.release	=	single_release,
+};
+
 int kvm_arch_create_vm_debugfs(struct kvm *kvm)
 {
+	if (!debugfs_create_file("tsc-offset", 0444,
+				kvm->debugfs_dentry, kvm, &tsc_offset_fops))
+		return -ENOMEM;
 	return 0;
 }
 
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux