Hello, On Wed, Jul 10, 2024 at 4:44 PM Suleiman Souhlal <suleiman@xxxxxxxxxx> wrote: > > When the host resumes from a suspend, the guest thinks any task > that was running during the suspend ran for a long time, even though > the effective run time was much shorter, which can end up having > negative effects with scheduling. This can be particularly noticeable > if the guest task was RT, as it can end up getting throttled for a > long time. > > To mitigate this issue, we include the time that the host was > suspended in steal time, which lets the guest can subtract the > duration from the tasks' runtime. > > Signed-off-by: Suleiman Souhlal <suleiman@xxxxxxxxxx> > --- > arch/x86/kvm/x86.c | 23 ++++++++++++++++++++++- > include/linux/kvm_host.h | 4 ++++ > 2 files changed, 26 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 0763a0f72a067f..94bbdeef843863 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3669,7 +3669,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) > struct kvm_steal_time __user *st; > struct kvm_memslots *slots; > gpa_t gpa = vcpu->arch.st.msr_val & KVM_STEAL_VALID_BITS; > - u64 steal; > + u64 steal, suspend_duration; > u32 version; > > if (kvm_xen_msr_enabled(vcpu->kvm)) { > @@ -3696,6 +3696,12 @@ static void record_steal_time(struct kvm_vcpu *vcpu) > return; > } > > + suspend_duration = 0; > + if (READ_ONCE(vcpu->suspended)) { > + suspend_duration = vcpu->kvm->last_suspend_duration; > + vcpu->suspended = 0; > + } > + > st = (struct kvm_steal_time __user *)ghc->hva; > /* > * Doing a TLB flush here, on the guest's behalf, can avoid > @@ -3749,6 +3755,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) > unsafe_get_user(steal, &st->steal, out); > steal += current->sched_info.run_delay - > vcpu->arch.st.last_steal; > + steal += suspend_duration; > vcpu->arch.st.last_steal = current->sched_info.run_delay; > unsafe_put_user(steal, &st->steal, out); > > @@ -6920,6 +6927,7 @@ static int kvm_arch_suspend_notifier(struct kvm *kvm) > > mutex_lock(&kvm->lock); > kvm_for_each_vcpu(i, vcpu, kvm) { > + WRITE_ONCE(vcpu->suspended, 1); > if (!vcpu->arch.pv_time.active) > continue; > > @@ -6932,15 +6940,28 @@ static int kvm_arch_suspend_notifier(struct kvm *kvm) > } > mutex_unlock(&kvm->lock); > > + kvm->suspended_time = ktime_get_boottime_ns(); > + > return ret ? NOTIFY_BAD : NOTIFY_DONE; > } > > +static int > +kvm_arch_resume_notifier(struct kvm *kvm) > +{ > + kvm->last_suspend_duration = ktime_get_boottime_ns() - > + kvm->suspended_time; > + return NOTIFY_DONE; > +} > + > int kvm_arch_pm_notifier(struct kvm *kvm, unsigned long state) > { > switch (state) { > case PM_HIBERNATION_PREPARE: > case PM_SUSPEND_PREPARE: > return kvm_arch_suspend_notifier(kvm); > + case PM_POST_HIBERNATION: > + case PM_POST_SUSPEND: > + return kvm_arch_resume_notifier(kvm); > } > > return NOTIFY_DONE; > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 692c01e41a18ef..2d37af9a348648 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -366,6 +366,8 @@ struct kvm_vcpu { > } async_pf; > #endif > > + bool suspended; > + > #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT > /* > * Cpu relax intercept or pause loop exit optimization > @@ -840,6 +842,8 @@ struct kvm { > struct xarray mem_attr_array; > #endif > char stats_id[KVM_STATS_NAME_SIZE]; > + u64 last_suspend_duration; > + u64 suspended_time; > }; > > #define kvm_err(fmt, ...) \ > -- > 2.45.2.993.g49e7a77208-goog > Gentle ping. Thanks, -- Suleiman