Finally make CPUs running in host OS deprivieged to VMX non-root mode. Add function pkvm_host_run_vcpu() to prepare host vcpu context and finally run into pkvm_main by entering the loop of vmenter to host VM and vmexit handling in pKVM. Call pkvm_host_deprivilege_cpus() in pkvm_init to make the deprivilege take effect. Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx> Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx> --- arch/x86/kvm/vmx/pkvm/include/pkvm.h | 1 + arch/x86/kvm/vmx/pkvm/pkvm_host.c | 91 +++++++++++++++++++++++++++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/vmx/pkvm/include/pkvm.h b/arch/x86/kvm/vmx/pkvm/include/pkvm.h index 86a8f5870108..59ef09230700 100644 --- a/arch/x86/kvm/vmx/pkvm/include/pkvm.h +++ b/arch/x86/kvm/vmx/pkvm/include/pkvm.h @@ -49,5 +49,6 @@ struct pkvm_hyp { #define PKVM_HOST_VCPU_PAGES (ALIGN(sizeof(struct pkvm_host_vcpu), PAGE_SIZE) >> PAGE_SHIFT) PKVM_DECLARE(void, __pkvm_vmx_vmexit(void)); +PKVM_DECLARE(int, pkvm_main(struct kvm_vcpu *vcpu)); #endif diff --git a/arch/x86/kvm/vmx/pkvm/pkvm_host.c b/arch/x86/kvm/vmx/pkvm/pkvm_host.c index 8aaacc56734e..1fa273396b9b 100644 --- a/arch/x86/kvm/vmx/pkvm/pkvm_host.c +++ b/arch/x86/kvm/vmx/pkvm/pkvm_host.c @@ -538,6 +538,84 @@ static inline void enable_feature_control(void) wrmsrl(MSR_IA32_FEAT_CTL, old | test_bits); } +#define savegpr(gpr, value) \ + asm("mov %%" #gpr ",%0":"=r" (value) : : "memory") + +static noinline int pkvm_host_run_vcpu(struct pkvm_host_vcpu *vcpu) +{ + u64 host_rsp; + unsigned long *regs = vcpu->vmx.vcpu.arch.regs; + volatile int ret = 0; + + /* + * prepare to RUN vcpu: + * + * - record gprs in vcpu.arch.regs[]: + * + * - record below guest vmcs fields: + * GUSET_RFLAGS - read from native + * + * - record below guest vmcs fields: + * GUSET_RFLAGS - read from native + * GUEST_RIP - vmentry_point + * GUEST_RSP - read from native + * + * - switch RSP to host_rsp + * - push guest_rsp to host stack + */ + savegpr(rax, regs[__VCPU_REGS_RAX]); + savegpr(rcx, regs[__VCPU_REGS_RCX]); + savegpr(rdx, regs[__VCPU_REGS_RDX]); + savegpr(rbx, regs[__VCPU_REGS_RBX]); + savegpr(rbp, regs[__VCPU_REGS_RBP]); + savegpr(rsi, regs[__VCPU_REGS_RSI]); + savegpr(rdi, regs[__VCPU_REGS_RDI]); + savegpr(r8, regs[__VCPU_REGS_R8]); + savegpr(r9, regs[__VCPU_REGS_R9]); + savegpr(r10, regs[__VCPU_REGS_R10]); + savegpr(r11, regs[__VCPU_REGS_R11]); + savegpr(r12, regs[__VCPU_REGS_R12]); + savegpr(r13, regs[__VCPU_REGS_R13]); + savegpr(r14, regs[__VCPU_REGS_R14]); + savegpr(r15, regs[__VCPU_REGS_R15]); + host_rsp = (u64)vcpu->pcpu->stack + STACK_SIZE; + asm volatile( + "pushfq\n" + "popq %%rax\n" + "movq %0, %%rdx\n" + "vmwrite %%rax, %%rdx\n" + "movq $vmentry_point, %%rax\n" + "movq %1, %%rdx\n" + "vmwrite %%rax, %%rdx\n" + "movq %%rsp, %%rax\n" + "movq %2, %%rdx\n" + "vmwrite %%rax, %%rdx\n" + "movq %3, %%rsp\n" + "pushq %%rax\n" + : + : "i"(GUEST_RFLAGS), "i"(GUEST_RIP), "i"(GUEST_RSP), "m"(host_rsp) + : "rax", "rdx", "memory"); + + /* + * call pkvm_main to do vmlaunch. + * + * if pkvm_main return - vmlaunch fail: + * pop back guest_rsp, ret = -EINVAL + * if pkvm_main not return - vmlaunch success: + * guest ret to vmentry_point, ret = 0 + */ + pkvm_sym(pkvm_main)(&vcpu->vmx.vcpu); + asm volatile( + "popq %%rdx\n" + "movq %%rdx, %%rsp\n" + "movq %1, %%rdx\n" + "movq %%rdx, %0\n" + "vmentry_point:\n" + : "=m"(ret) : "i"(-EINVAL) : "rdx", "memory"); + + return ret; +} + static __init void pkvm_host_deprivilege_cpu(void *data) { struct pkvm_deprivilege_param *p = data; @@ -556,13 +634,18 @@ static __init void pkvm_host_deprivilege_cpu(void *data) goto out; } - /* TODO:KICK to RUN vcpu. let's directly go with out(return failure) now */ + ret = pkvm_host_run_vcpu(vcpu); + if (ret == 0) { + pr_info("%s: CPU%d in guest mode\n", __func__, cpu); + goto ok; + } out: - p->ret = -ENOTSUPP; + p->ret = ret; pkvm_host_deinit_vmx(vcpu); pr_err("%s: failed to deprivilege CPU%d\n", __func__, cpu); +ok: local_irq_restore(flags); put_cpu(); @@ -619,6 +702,10 @@ __init int pkvm_init(void) goto out_free_cpu; } + ret = pkvm_host_deprivilege_cpus(pkvm); + if (ret) + goto out_free_cpu; + pkvm->num_cpus = num_possible_cpus(); return 0; -- 2.25.1