Re: x86: kvmclock: abstract save/restore sched_clock_state (v2)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 02/13/2012 02:07 PM, Marcelo Tosatti wrote:

Upon resume from hibernation, CPU 0's hvclock area contains the old
values for system_time and tsc_timestamp. It is necessary for the
hypervisor to update these values with uptodate ones before the CPU uses
them.

Abstract TSC's save/restore sched_clock_state functions and use
restore_state to write to KVM_SYSTEM_TIME MSR, forcing an update.

Also move restore_sched_clock_state before __restore_processor_state,
since the later calls CONFIG_LOCK_STAT's lockstat_clock (also for TSC).
Thanks to Igor Mammedov for tracking it down.

Fixes suspend-to-disk with kvmclock.

Signed-off-by: Marcelo Tosatti<mtosatti@xxxxxxxxxx>

diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
index 15d9915..c91e8b9 100644
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu);
  extern void check_tsc_sync_target(void);

  extern int notsc_setup(char *);
-extern void save_sched_clock_state(void);
-extern void restore_sched_clock_state(void);
+extern void tsc_save_sched_clock_state(void);
+extern void tsc_restore_sched_clock_state(void);

  #endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index 5d0afac..baaca8d 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -162,6 +162,8 @@ struct x86_cpuinit_ops {
   * @is_untracked_pat_range	exclude from PAT logic
   * @nmi_init			enable NMI on cpus
   * @i8042_detect		pre-detect if i8042 controller exists
+ * @save_sched_clock_state:	save state for sched_clock() on suspend
+ * @restore_sched_clock_state:	restore state for sched_clock() on resume
   */
  struct x86_platform_ops {
  	unsigned long (*calibrate_tsc)(void);
@@ -173,6 +175,8 @@ struct x86_platform_ops {
  	void (*nmi_init)(void);
  	unsigned char (*get_nmi_reason)(void);
  	int (*i8042_detect)(void);
+	void (*save_sched_clock_state)(void);
+	void (*restore_sched_clock_state)(void);
  };

  struct pci_dev;
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index ca4e735..57e6b78 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt)
  	return ret;
  }

+void kvm_save_sched_clock_state(void)
+{
+}
+
+void kvm_restore_sched_clock_state(void)
+{
+	kvm_register_clock("primary cpu clock, resume");
+}
+
  #ifdef CONFIG_X86_LOCAL_APIC
  static void __cpuinit kvm_setup_secondary_clock(void)
  {
@@ -195,6 +204,8 @@ void __init kvmclock_init(void)
  	x86_cpuinit.early_percpu_clock_init =
  		kvm_setup_secondary_clock;
  #endif
+	x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
+	x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
  	machine_ops.shutdown  = kvm_shutdown;
  #ifdef CONFIG_KEXEC
  	machine_ops.crash_shutdown  = kvm_crash_shutdown;
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index a62c201..aed2aa1 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -629,7 +629,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)

  static unsigned long long cyc2ns_suspend;

-void save_sched_clock_state(void)
+void tsc_save_sched_clock_state(void)
  {
  	if (!sched_clock_stable)
  		return;
@@ -645,7 +645,7 @@ void save_sched_clock_state(void)
   * that sched_clock() continues from the point where it was left off during
   * suspend.
   */
-void restore_sched_clock_state(void)
+void tsc_restore_sched_clock_state(void)
  {
  	unsigned long long offset;
  	unsigned long flags;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index 6f2ec53..e9f265f 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -108,7 +108,9 @@ struct x86_platform_ops x86_platform = {
  	.is_untracked_pat_range		= is_ISA_range,
  	.nmi_init			= default_nmi_init,
  	.get_nmi_reason			= default_get_nmi_reason,
-	.i8042_detect			= default_i8042_detect
+	.i8042_detect			= default_i8042_detect,
+	.save_sched_clock_state 	= tsc_save_sched_clock_state,
+	.restore_sched_clock_state 	= tsc_restore_sched_clock_state,
  };

  EXPORT_SYMBOL_GPL(x86_platform);
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index f10c0af..0e76a28 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -114,7 +114,7 @@ static void __save_processor_state(struct saved_context *ctxt)
  void save_processor_state(void)
  {
  	__save_processor_state(&saved_context);
-	save_sched_clock_state();
+	x86_platform.save_sched_clock_state();
  }
  #ifdef CONFIG_X86_32
  EXPORT_SYMBOL(save_processor_state);
@@ -230,8 +230,8 @@ static void __restore_processor_state(struct saved_context *ctxt)
  /* Needed by apm.c */
  void restore_processor_state(void)
  {
+	x86_platform.restore_sched_clock_state();
Isn't it too early? It is scarry to say hypervisor to write to some
memory location and than completely replace page-tables and half of
cpu state in __restore_processor_state. Wouldn't that have a potential
of writing into a place that is not restored hv_clock and restored
hv_clock might still be stale?

  	__restore_processor_state(&saved_context);
-	restore_sched_clock_state();
  }
  #ifdef CONFIG_X86_32
  EXPORT_SYMBOL(restore_processor_state);

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
Thanks,
 Igor
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux