Hi Colton, On Mon, Apr 22, 2024 at 06:17:16PM +0000, Colton Lewis wrote: > Add an early_params to control WFI and WFE trapping. This is to > control the degree guests can wait for interrupts on their own without > being trapped by KVM. Options for each param are trap, notrap, and > default. trap enables the trap. notrap disables the trap. default > preserves current behavior, disabling the trap if only a single task > is running and enabling otherwise. > > Signed-off-by: Colton Lewis <coltonlewis@xxxxxxxxxx> > --- > v4: > > * Fixed inaccurate names that incorrectly implied this controls interrupts > themselves instead of instructions waiting for interrupts and events > * Split into two separate params as interrupts (WFI) and events (WFE) do > different things and may warrant separate controls. > * Document new params in Documentation/admin-guide/kernel-parameters.txt > > > v3: > https://lore.kernel.org/kvmarm/20240410175437.793508-1-coltonlewis@xxxxxxxxxx/ > > v2: > https://lore.kernel.org/kvmarm/20240319164341.1674863-1-coltonlewis@xxxxxxxxxx/ > > v1: > https://lore.kernel.org/kvmarm/20240129213918.3124494-1-coltonlewis@xxxxxxxxxx/ > > .../admin-guide/kernel-parameters.txt | 22 +++++++- > arch/arm64/include/asm/kvm_emulate.h | 24 ++++++++- > arch/arm64/include/asm/kvm_host.h | 7 +++ > arch/arm64/kvm/arm.c | 54 +++++++++++++++++-- > 4 files changed, 101 insertions(+), 6 deletions(-) > > diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt > index 31b3a25680d0..f8d16c792e66 100644 > --- a/Documentation/admin-guide/kernel-parameters.txt > +++ b/Documentation/admin-guide/kernel-parameters.txt > @@ -2653,6 +2653,27 @@ > [KVM,ARM] Allow use of GICv4 for direct injection of > LPIs. > > + kvm-arm.wfe_trap_policy= > + [KVM,ARM] Control when to set wfe instruction trap. nitpick: when referring to the instruction, please capitalize it. Also, it doesn't hurt to be verbose here and say this cmdline option "Controls the WFE instruction trap behavior for KVM VMs" I say this because there is a separate set of trap controls that allow WFE or WFI to execute in EL0 (i.e. host userspace). > + trap: set wfe instruction trap > + > + notrap: clear wfe instruction trap > + > + default: set wfe instruction trap only if multiple > + tasks are running on the CPU I would strongly prefer we not make any default behavior user-visible. The default KVM behavior can (and will) change in the future. Only the absence of an explicit trap / notrap policy should fall back to KVM's default heuristics. > + kvm-arm.wfi_trap_policy= > + [KVM,ARM] Control when to set wfi instruction trap. > + > + trap: set wfi instruction trap > + > + notrap: clear wfi instruction trap > + > + default: set wfi instruction trap only if multiple > + tasks are running on the CPU > + > + > kvm_cma_resv_ratio=n [PPC] > Reserves given percentage from system memory area for > contiguous memory allocation for KVM hash pagetable > @@ -7394,4 +7415,3 @@ > memory, and other data can't be written using > xmon commands. > off xmon is disabled. > - > diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h > index b804fe832184..efd0a3fb6f00 100644 > --- a/arch/arm64/include/asm/kvm_emulate.h > +++ b/arch/arm64/include/asm/kvm_emulate.h > @@ -109,9 +109,13 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) > return (unsigned long *)&vcpu->arch.hcr_el2; > } > > -static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) > +static inline void vcpu_clear_wfe_trap(struct kvm_vcpu *vcpu) > { > vcpu->arch.hcr_el2 &= ~HCR_TWE; > +} > + > +static inline void vcpu_clear_wfi_trap(struct kvm_vcpu *vcpu) > +{ > if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || > vcpu->kvm->arch.vgic.nassgireq) > vcpu->arch.hcr_el2 &= ~HCR_TWI; > @@ -119,12 +123,28 @@ static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) > vcpu->arch.hcr_el2 |= HCR_TWI; > } This helper definitely does not do as it says on the tin. It ignores the policy requested on the command line and conditionally *sets* TWI. If the operator believes they know best and ask for a particular trap policy KVM should uphold it unconditionally. Even if they've managed to shoot themselves in the foot. > -static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) > +static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) > +{ > + vcpu_clear_wfe_trap(vcpu); > + vcpu_clear_wfi_trap(vcpu); > +} > + > +static inline void vcpu_set_wfe_trap(struct kvm_vcpu *vcpu) > { > vcpu->arch.hcr_el2 |= HCR_TWE; > +} > + > +static inline void vcpu_set_wfi_trap(struct kvm_vcpu *vcpu) > +{ > vcpu->arch.hcr_el2 |= HCR_TWI; > } > > +static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) > +{ > + vcpu_set_wfe_trap(vcpu); > + vcpu_set_wfi_trap(vcpu); > +} > + > static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) > { > vcpu->arch.hcr_el2 |= (HCR_API | HCR_APK); > diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h > index 21c57b812569..315ee7bfc1cb 100644 > --- a/arch/arm64/include/asm/kvm_host.h > +++ b/arch/arm64/include/asm/kvm_host.h > @@ -67,6 +67,13 @@ enum kvm_mode { > KVM_MODE_NV, > KVM_MODE_NONE, > }; > + > +enum kvm_wfx_trap_policy { > + KVM_WFX_NOTRAP_SINGLE_TASK, /* Default option */ > + KVM_WFX_NOTRAP, > + KVM_WFX_TRAP, > +}; > + > #ifdef CONFIG_KVM > enum kvm_mode kvm_get_mode(void); > #else > diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c > index a25265aca432..5106ba5a8a39 100644 > --- a/arch/arm64/kvm/arm.c > +++ b/arch/arm64/kvm/arm.c > @@ -46,6 +46,8 @@ > #include <kvm/arm_psci.h> > > static enum kvm_mode kvm_mode = KVM_MODE_DEFAULT; > +static enum kvm_wfx_trap_policy kvm_wfi_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK; > +static enum kvm_wfx_trap_policy kvm_wfe_trap_policy = KVM_WFX_NOTRAP_SINGLE_TASK; > > DECLARE_KVM_HYP_PER_CPU(unsigned long, kvm_hyp_vector); > > @@ -423,6 +425,12 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) > > } > > +static bool kvm_should_clear_wfx_trap(enum kvm_wfx_trap_policy p) > +{ > + return (p == KVM_WFX_NOTRAP && kvm_vgic_global_state.has_gicv4) > + || (p == KVM_WFX_NOTRAP_SINGLE_TASK && single_task_running()); > +} style nitpick: operators should always go on the preceding line for a multi-line statement. > void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > { > struct kvm_s2_mmu *mmu; > @@ -456,10 +464,15 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) > if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) > kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); > > - if (single_task_running()) > - vcpu_clear_wfx_traps(vcpu); > + if (kvm_should_clear_wfx_trap(kvm_wfi_trap_policy)) > + vcpu_clear_wfi_trap(vcpu); > else > - vcpu_set_wfx_traps(vcpu); > + vcpu_set_wfi_trap(vcpu); > + > + if (kvm_should_clear_wfx_trap(kvm_wfe_trap_policy)) > + vcpu_clear_wfe_trap(vcpu); > + else > + vcpu_set_wfe_trap(vcpu); > > if (vcpu_has_ptrauth(vcpu)) > vcpu_ptrauth_disable(vcpu); I find all of the layering rather hard to follow; we don't need accessors for doing simple bit manipulation. Rough sketch: static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu) { if (unlikely(kvm_wfi_trap != KVM_WFX_DEFAULT)) return kvm_wfi_trap == KVM_WFX_NOTRAP; return single_task_running() && (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count) || vcpu->kvm->arch.vgic.nassgireq); } static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu) { if (unlikely(kvm_wfe_trap != KVM_WFX_DEFAULT)) return kvm_wfe_trap == KVM_WFX_NOTRAP; return single_task_running(); } static void kvm_vcpu_load_compute_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 |= HCR_TWE | HCR_TWI; if (kvm_vcpu_should_clear_twe(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_TWE; if (kvm_vcpu_should_clear_twi(vcpu)) vcpu->arch.hcr_el2 &= ~HCR_TWI; } And if we really wanted to, the non-default trap configuration could be moved to vcpu_reset_hcr() if we cared. -- Thanks, Oliver