On Mon, Feb 13, 2012 at 04:20:24PM +0100, Igor Mammedov wrote: > On 02/13/2012 02:07 PM, Marcelo Tosatti wrote: > > > >Upon resume from hibernation, CPU 0's hvclock area contains the old > >values for system_time and tsc_timestamp. It is necessary for the > >hypervisor to update these values with uptodate ones before the CPU uses > >them. > > > >Abstract TSC's save/restore sched_clock_state functions and use > >restore_state to write to KVM_SYSTEM_TIME MSR, forcing an update. > > > >Also move restore_sched_clock_state before __restore_processor_state, > >since the later calls CONFIG_LOCK_STAT's lockstat_clock (also for TSC). > >Thanks to Igor Mammedov for tracking it down. > > > >Fixes suspend-to-disk with kvmclock. > > > >Signed-off-by: Marcelo Tosatti<mtosatti@xxxxxxxxxx> > > > >diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h > >index 15d9915..c91e8b9 100644 > >--- a/arch/x86/include/asm/tsc.h > >+++ b/arch/x86/include/asm/tsc.h > >@@ -61,7 +61,7 @@ extern void check_tsc_sync_source(int cpu); > > extern void check_tsc_sync_target(void); > > > > extern int notsc_setup(char *); > >-extern void save_sched_clock_state(void); > >-extern void restore_sched_clock_state(void); > >+extern void tsc_save_sched_clock_state(void); > >+extern void tsc_restore_sched_clock_state(void); > > > > #endif /* _ASM_X86_TSC_H */ > >diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h > >index 5d0afac..baaca8d 100644 > >--- a/arch/x86/include/asm/x86_init.h > >+++ b/arch/x86/include/asm/x86_init.h > >@@ -162,6 +162,8 @@ struct x86_cpuinit_ops { > > * @is_untracked_pat_range exclude from PAT logic > > * @nmi_init enable NMI on cpus > > * @i8042_detect pre-detect if i8042 controller exists > >+ * @save_sched_clock_state: save state for sched_clock() on suspend > >+ * @restore_sched_clock_state: restore state for sched_clock() on resume > > */ > > struct x86_platform_ops { > > unsigned long (*calibrate_tsc)(void); > >@@ -173,6 +175,8 @@ struct x86_platform_ops { > > void (*nmi_init)(void); > > unsigned char (*get_nmi_reason)(void); > > int (*i8042_detect)(void); > >+ void (*save_sched_clock_state)(void); > >+ void (*restore_sched_clock_state)(void); > > }; > > > > struct pci_dev; > >diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c > >index ca4e735..57e6b78 100644 > >--- a/arch/x86/kernel/kvmclock.c > >+++ b/arch/x86/kernel/kvmclock.c > >@@ -136,6 +136,15 @@ int kvm_register_clock(char *txt) > > return ret; > > } > > > >+void kvm_save_sched_clock_state(void) > >+{ > >+} > >+ > >+void kvm_restore_sched_clock_state(void) > >+{ > >+ kvm_register_clock("primary cpu clock, resume"); > >+} > >+ > > #ifdef CONFIG_X86_LOCAL_APIC > > static void __cpuinit kvm_setup_secondary_clock(void) > > { > >@@ -195,6 +204,8 @@ void __init kvmclock_init(void) > > x86_cpuinit.early_percpu_clock_init = > > kvm_setup_secondary_clock; > > #endif > >+ x86_platform.save_sched_clock_state = kvm_save_sched_clock_state; > >+ x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state; > > machine_ops.shutdown = kvm_shutdown; > > #ifdef CONFIG_KEXEC > > machine_ops.crash_shutdown = kvm_crash_shutdown; > >diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c > >index a62c201..aed2aa1 100644 > >--- a/arch/x86/kernel/tsc.c > >+++ b/arch/x86/kernel/tsc.c > >@@ -629,7 +629,7 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu) > > > > static unsigned long long cyc2ns_suspend; > > > >-void save_sched_clock_state(void) > >+void tsc_save_sched_clock_state(void) > > { > > if (!sched_clock_stable) > > return; > >@@ -645,7 +645,7 @@ void save_sched_clock_state(void) > > * that sched_clock() continues from the point where it was left off during > > * suspend. > > */ > >-void restore_sched_clock_state(void) > >+void tsc_restore_sched_clock_state(void) > > { > > unsigned long long offset; > > unsigned long flags; > >diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c > >index 6f2ec53..e9f265f 100644 > >--- a/arch/x86/kernel/x86_init.c > >+++ b/arch/x86/kernel/x86_init.c > >@@ -108,7 +108,9 @@ struct x86_platform_ops x86_platform = { > > .is_untracked_pat_range = is_ISA_range, > > .nmi_init = default_nmi_init, > > .get_nmi_reason = default_get_nmi_reason, > >- .i8042_detect = default_i8042_detect > >+ .i8042_detect = default_i8042_detect, > >+ .save_sched_clock_state = tsc_save_sched_clock_state, > >+ .restore_sched_clock_state = tsc_restore_sched_clock_state, > > }; > > > > EXPORT_SYMBOL_GPL(x86_platform); > >diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c > >index f10c0af..0e76a28 100644 > >--- a/arch/x86/power/cpu.c > >+++ b/arch/x86/power/cpu.c > >@@ -114,7 +114,7 @@ static void __save_processor_state(struct saved_context *ctxt) > > void save_processor_state(void) > > { > > __save_processor_state(&saved_context); > >- save_sched_clock_state(); > >+ x86_platform.save_sched_clock_state(); > > } > > #ifdef CONFIG_X86_32 > > EXPORT_SYMBOL(save_processor_state); > >@@ -230,8 +230,8 @@ static void __restore_processor_state(struct saved_context *ctxt) > > /* Needed by apm.c */ > > void restore_processor_state(void) > > { > >+ x86_platform.restore_sched_clock_state(); > Isn't it too early? It is scarry to say hypervisor to write to some > memory location and than completely replace page-tables and half of > cpu state in __restore_processor_state. Wouldn't that have a potential > of writing into a place that is not restored hv_clock and restored > hv_clock might still be stale? No, memory is copied in swsusp_arch_resume(), which happens before restore_processor_state. restore_processor_state() is only setting up registers and MTRR. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html