On 11/14/24 00:56, Sean Christopherson wrote:
+static bool kvm_nx_huge_page_recovery_worker(void *data)
+{
+ struct kvm *kvm = data;
long remaining_time;
- while (true) {
- start_time = get_jiffies_64();
- remaining_time = get_nx_huge_page_recovery_timeout(start_time);
+ if (kvm->arch.nx_huge_page_next == NX_HUGE_PAGE_DISABLED)
+ return false;
The "next" concept is broken. Once KVM sees NX_HUGE_PAGE_DISABLED for a given VM,
KVM will never re-evaluate nx_huge_page_next. Similarly, if the recovery period
and/or ratio changes, KVM won't recompute the "next" time until the current timeout
has expired.
I fiddled around with various ideas, but I don't see a better solution that something
along the lines of KVM's request system, e.g. set a bool to indicate the params
changed, and sprinkle smp_{r,w}mb() barriers to ensure the vhost task sees the
new params.
"next" is broken, but there is a much better way to fix it. You just
track the *last* time that the recovery ran. This is also better
behaved when you flip recovery back and forth to disabled and back
to enabled: if your recovery period is 1 minute, it will run the
next recovery after 1 minute independent of how many times you flipped
the parameter.
This also fits much better with the "restart function after
schedule()" model that vhost_task.c requires, and it gets rid of
get_nx_huge_page_recovery_whatever() completely. It's also silly how
close the code is to the broken v1, which is why I'm sending it below
instead of sending again a large patchdd.
Paolo
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 72f3bcfc54d7..e159e44a6a1b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1444,7 +1444,7 @@ struct kvm_arch {
struct kvm_x86_pmu_event_filter __rcu *pmu_event_filter;
struct vhost_task *nx_huge_page_recovery_thread;
- u64 nx_huge_page_next;
+ u64 nx_huge_page_last;
#ifdef CONFIG_X86_64
/* The number of TDP MMU pages across all roots. */
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index d0c2d9d2588f..22e7ad235123 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -7394,19 +7394,6 @@ static void kvm_recover_nx_huge_pages(struct kvm *kvm)
srcu_read_unlock(&kvm->srcu, rcu_idx);
}
-#define NX_HUGE_PAGE_DISABLED (-1)
-
-static u64 get_nx_huge_page_recovery_next(void)
-{
- bool enabled;
- uint period;
-
- enabled = calc_nx_huge_pages_recovery_period(&period);
-
- return enabled ? get_jiffies_64() + msecs_to_jiffies(period)
- : NX_HUGE_PAGE_DISABLED;
-}
-
static void kvm_nx_huge_page_recovery_worker_kill(void *data)
{
}
@@ -7414,12 +7401,16 @@ static void kvm_nx_huge_page_recovery_worker_kill(void *data)
static bool kvm_nx_huge_page_recovery_worker(void *data)
{
struct kvm *kvm = data;
+ bool enabled;
+ uint period;
long remaining_time;
- if (kvm->arch.nx_huge_page_next == NX_HUGE_PAGE_DISABLED)
+ enabled = calc_nx_huge_pages_recovery_period(&period);
+ if (!enabled)
return false;
- remaining_time = kvm->arch.nx_huge_page_next - get_jiffies_64();
+ remaining_time = kvm->arch.nx_huge_page_last + msecs_to_jiffies(period)
+ - get_jiffies_64();
if (remaining_time > 0) {
schedule_timeout(remaining_time);
/* check for signals and come back */
@@ -7428,7 +7419,7 @@ static bool kvm_nx_huge_page_recovery_worker(void *data)
__set_current_state(TASK_RUNNING);
kvm_recover_nx_huge_pages(kvm);
- kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
+ kvm->arch.nx_huge_page_last = get_jiffies_64();
return true;
}
@@ -7437,11 +7428,11 @@ int kvm_mmu_post_init_vm(struct kvm *kvm)
if (nx_hugepage_mitigation_hard_disabled)
return 0;
- kvm->arch.nx_huge_page_next = get_nx_huge_page_recovery_next();
+ kvm->arch.nx_huge_page_last = get_jiffies_64();
kvm->arch.nx_huge_page_recovery_thread = vhost_task_create(
kvm_nx_huge_page_recovery_worker, kvm_nx_huge_page_recovery_worker_kill,
kvm, "kvm-nx-lpage-recovery");
-
+
if (!kvm->arch.nx_huge_page_recovery_thread)
return -ENOMEM;