This enables guests to lock their CR0 and CR4 registers with a subset of X86_CR0_WP, X86_CR4_SMEP, X86_CR4_SMAP, X86_CR4_UMIP, X86_CR4_FSGSBASE and X86_CR4_CET flags. The new KVM_HC_LOCK_CR_UPDATE hypercall takes two arguments. The first is to identify the control register, and the second is a bit mask to pin (i.e. mark as read-only). These register flags should already be pinned by Linux guests, but once compromised, this self-protection mechanism could be disabled, which is not the case with this dedicated hypercall. Cc: Borislav Petkov <bp@xxxxxxxxx> Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx> Cc: H. Peter Anvin <hpa@xxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: Kees Cook <keescook@xxxxxxxxxxxx> Cc: Madhavan T. Venkataraman <madvenka@xxxxxxxxxxxxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: Sean Christopherson <seanjc@xxxxxxxxxx> Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Vitaly Kuznetsov <vkuznets@xxxxxxxxxx> Cc: Wanpeng Li <wanpengli@xxxxxxxxxxx> Signed-off-by: Mickaël Salaün <mic@xxxxxxxxxxx> Link: https://lore.kernel.org/r/20230505152046.6575-6-mic@xxxxxxxxxxx --- Documentation/virt/kvm/x86/hypercalls.rst | 15 +++++ arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kvm/vmx/vmx.c | 10 ++++ arch/x86/kvm/x86.c | 72 +++++++++++++++++++++++ arch/x86/kvm/x86.h | 16 +++++ include/linux/kvm_host.h | 3 + include/uapi/linux/kvm_para.h | 1 + 7 files changed, 118 insertions(+), 1 deletion(-) diff --git a/Documentation/virt/kvm/x86/hypercalls.rst b/Documentation/virt/kvm/x86/hypercalls.rst index 0ec79cc77f53..8aa5d28986e3 100644 --- a/Documentation/virt/kvm/x86/hypercalls.rst +++ b/Documentation/virt/kvm/x86/hypercalls.rst @@ -207,3 +207,18 @@ identified with set of physical page ranges (GFNs). The HEKI_ATTR_MEM_NOWRITE memory page range attribute forbids related modification to the guest. Returns 0 on success or a KVM error code otherwise. + +10. KVM_HC_LOCK_CR_UPDATE +------------------------- + +:Architecture: x86 +:Status: active +:Purpose: Request some control registers to be restricted. + +- a0: identify a control register +- a1: bit mask to make some flags read-only + +The hypercall lets a guest request control register flags to be pinned for +itself. + +Returns 0 on success or a KVM error code otherwise. diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f3cc7699e1e1..dd89379fe5ac 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -413,7 +413,7 @@ static __always_inline void setup_umip(struct cpuinfo_x86 *c) } /* These bits should not change their value after CPU init is finished. */ -static const unsigned long cr4_pinned_mask = +const unsigned long cr4_pinned_mask = X86_CR4_SMEP | X86_CR4_SMAP | X86_CR4_UMIP | X86_CR4_FSGSBASE | X86_CR4_CET; static DEFINE_STATIC_KEY_FALSE_RO(cr_pinning); diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 9870db887a62..931688edc8eb 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -3162,6 +3162,11 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long hw_cr0, old_cr0_pg; u32 tmp; + int res; + + res = heki_check_cr(vcpu->kvm, 0, cr0); + if (res) + return; old_cr0_pg = kvm_read_cr0_bits(vcpu, X86_CR0_PG); @@ -3323,6 +3328,11 @@ void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) * this bit, even if host CR4.MCE == 0. */ unsigned long hw_cr4; + int res; + + res = heki_check_cr(vcpu->kvm, 4, cr4); + if (res) + return; hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE); if (is_unrestricted_guest(vcpu)) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ffab64d08de3..a529455359ac 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7927,11 +7927,77 @@ static unsigned long emulator_get_cr(struct x86_emulate_ctxt *ctxt, int cr) return value; } +#ifdef CONFIG_HEKI + +extern unsigned long cr4_pinned_mask; + +static int heki_lock_cr(struct kvm *const kvm, const unsigned long cr, + unsigned long pin) +{ + if (!pin) + return -KVM_EINVAL; + + switch (cr) { + case 0: + /* Cf. arch/x86/kernel/cpu/common.c */ + if (!(pin & X86_CR0_WP)) + return -KVM_EINVAL; + + if ((read_cr0() & pin) != pin) + return -KVM_EINVAL; + + atomic_long_or(pin, &kvm->heki_pinned_cr0); + return 0; + case 4: + /* Checks for irrelevant bits. */ + if ((pin & cr4_pinned_mask) != pin) + return -KVM_EINVAL; + + /* Ignores bits not present in host. */ + pin &= __read_cr4(); + atomic_long_or(pin, &kvm->heki_pinned_cr4); + return 0; + } + return -KVM_EINVAL; +} + +int heki_check_cr(const struct kvm *const kvm, const unsigned long cr, + const unsigned long val) +{ + unsigned long pinned; + + switch (cr) { + case 0: + pinned = atomic_long_read(&kvm->heki_pinned_cr0); + if ((val & pinned) != pinned) { + pr_warn_ratelimited( + "heki-kvm: Blocked CR0 update: 0x%lx\n", val); + return -KVM_EPERM; + } + return 0; + case 4: + pinned = atomic_long_read(&kvm->heki_pinned_cr4); + if ((val & pinned) != pinned) { + pr_warn_ratelimited( + "heki-kvm: Blocked CR4 update: 0x%lx\n", val); + return -KVM_EPERM; + } + return 0; + } + return 0; +} + +#endif /* CONFIG_HEKI */ + static int emulator_set_cr(struct x86_emulate_ctxt *ctxt, int cr, ulong val) { struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); int res = 0; + res = heki_check_cr(vcpu->kvm, cr, val); + if (res) + return res; + switch (cr) { case 0: res = kvm_set_cr0(vcpu, mk_cr_64(kvm_read_cr0(vcpu), val)); @@ -9858,6 +9924,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) else ret = heki_lock_mem_page_ranges(vcpu->kvm, a0, a1); break; + case KVM_HC_LOCK_CR_UPDATE: + if (a0 > U32_MAX) + ret = -KVM_EINVAL; + else + ret = heki_lock_cr(vcpu->kvm, a0, a1); + break; #endif /* CONFIG_HEKI */ default: ret = -KVM_ENOSYS; diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 9de72586f406..3e80a60ecbd8 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -276,6 +276,22 @@ static inline bool kvm_check_has_quirk(struct kvm *kvm, u64 quirk) return !(kvm->arch.disabled_quirks & quirk); } +#ifdef CONFIG_HEKI + +int heki_check_cr(const struct kvm *kvm, unsigned long cr, unsigned long val); + +bool kvm_heki_is_exec_allowed(struct kvm_vcpu *vcpu, gpa_t gpa); + +#else /* CONFIG_HEKI */ + +static inline int heki_check_cr(const struct kvm *const kvm, + const unsigned long cr, const unsigned long val) +{ + return 0; +} + +#endif /* CONFIG_HEKI */ + void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip); u64 get_kvmclock_ns(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 39a1bdc2ba42..ab9dc723bc89 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -812,6 +812,9 @@ struct kvm { #define HEKI_GFN_MAX 16 atomic_t heki_gfn_no_write_num; struct heki_gfn_range heki_gfn_no_write[HEKI_GFN_MAX]; + + atomic_long_t heki_pinned_cr0; + atomic_long_t heki_pinned_cr4; #endif /* CONFIG_HEKI */ #ifdef CONFIG_HAVE_KVM_PM_NOTIFIER diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index d7512a10880e..9f68d4ba646b 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -31,6 +31,7 @@ #define KVM_HC_SCHED_YIELD 11 #define KVM_HC_MAP_GPA_RANGE 12 #define KVM_HC_LOCK_MEM_PAGE_RANGES 13 +#define KVM_HC_LOCK_CR_UPDATE 14 /* * hypercalls use architecture specific -- 2.40.1