Introduce pkvm_pcpu data structure, and pKVM can support up to CONFIG_NR_CPUS pCPUs. The pkvm_pcpu contains necessary environment fields to run a physical CPU, like stack, GDT/IDT/TSS and cr3. As pKVM is isolated from host OS, it needs its own running environment for each physical CPU. Initialize dedicated GDT/IDT/TSS for pKVM pCPU. The GDT/TSS setting is referred from host Linux, while the IDT is configured to jump to noop_handler for all exceptions as no exception support in pKVM (NMI is another story which will be added in the future). At this moment, the pKVM still shares MMU page table with host Linux, so setup its CR3 from host setting. In the future, pKVM shall create its own MMU page table. The pkvm_pcpu data entry is allocated according to current cpu number system running, during the new added pcpu setup logic within pkvm_init. Signed-off-by: Chuanxiao Dong <chuanxiao.dong@xxxxxxxxx> Signed-off-by: Jason Chen CJ <jason.cj.chen@xxxxxxxxx> --- arch/x86/kvm/vmx/pkvm/include/pkvm.h | 17 ++++++ arch/x86/kvm/vmx/pkvm/pkvm_host.c | 91 +++++++++++++++++++++++++++- 2 files changed, 107 insertions(+), 1 deletion(-) diff --git a/arch/x86/kvm/vmx/pkvm/include/pkvm.h b/arch/x86/kvm/vmx/pkvm/include/pkvm.h index cda599194588..3adcebd31ca6 100644 --- a/arch/x86/kvm/vmx/pkvm/include/pkvm.h +++ b/arch/x86/kvm/vmx/pkvm/include/pkvm.h @@ -8,13 +8,30 @@ #include <vmx/vmx.h> +#define STACK_SIZE SZ_16K + +struct idt_page { + gate_desc idt[IDT_ENTRIES]; +} __aligned(PAGE_SIZE); + +struct pkvm_pcpu { + u8 stack[STACK_SIZE] __aligned(16); + unsigned long cr3; + struct gdt_page gdt_page; + struct idt_page idt_page; + struct tss_struct tss; +}; + struct pkvm_hyp { int num_cpus; struct vmx_capability vmx_cap; struct vmcs_config vmcs_config; + + struct pkvm_pcpu *pcpus[CONFIG_NR_CPUS]; }; #define PKVM_PAGES (ALIGN(sizeof(struct pkvm_hyp), PAGE_SIZE) >> PAGE_SHIFT) +#define PKVM_PCPU_PAGES (ALIGN(sizeof(struct pkvm_pcpu), PAGE_SIZE) >> PAGE_SHIFT) #endif diff --git a/arch/x86/kvm/vmx/pkvm/pkvm_host.c b/arch/x86/kvm/vmx/pkvm/pkvm_host.c index 8fd31360faf8..a076f023c582 100644 --- a/arch/x86/kvm/vmx/pkvm/pkvm_host.c +++ b/arch/x86/kvm/vmx/pkvm/pkvm_host.c @@ -5,6 +5,7 @@ #include <linux/kernel.h> #include <linux/module.h> +#include <asm/trapnr.h> #include <pkvm.h> @@ -12,6 +13,14 @@ MODULE_LICENSE("GPL"); static struct pkvm_hyp *pkvm; +/* only need GDT entries for KERNEL_CS & KERNEL_DS as pKVM only use these two */ +static struct gdt_page pkvm_gdt_page = { + .gdt = { + [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), + [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), + }, +}; + static void *pkvm_early_alloc_contig(int pages) { return alloc_pages_exact(pages << PAGE_SHIFT, GFP_KERNEL | __GFP_ZERO); @@ -75,9 +84,76 @@ static __init int pkvm_host_check_and_setup_vmx_cap(struct pkvm_hyp *pkvm) return ret; } +static __init void init_gdt(struct pkvm_pcpu *pcpu) +{ + pcpu->gdt_page = pkvm_gdt_page; +} + +void noop_handler(void) +{ + /* To be added */ +} + +static __init void init_idt(struct pkvm_pcpu *pcpu) +{ + gate_desc *idt = pcpu->idt_page.idt; + struct idt_data d = { + .segment = __KERNEL_CS, + .bits.ist = 0, + .bits.zero = 0, + .bits.type = GATE_INTERRUPT, + .bits.dpl = 0, + .bits.p = 1, + }; + gate_desc desc; + int i; + + for (i = 0; i <= X86_TRAP_IRET; i++) { + d.vector = i; + d.bits.ist = 0; + d.addr = (const void *)noop_handler; + idt_init_desc(&desc, &d); + write_idt_entry(idt, i, &desc); + } +} + +static __init void init_tss(struct pkvm_pcpu *pcpu) +{ + struct desc_struct *d = pcpu->gdt_page.gdt; + tss_desc tss; + + set_tssldt_descriptor(&tss, (unsigned long)&pcpu->tss, DESC_TSS, + __KERNEL_TSS_LIMIT); + + write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); +} + +static __init int pkvm_setup_pcpu(struct pkvm_hyp *pkvm, int cpu) +{ + struct pkvm_pcpu *pcpu; + + if (cpu >= CONFIG_NR_CPUS) + return -ENOMEM; + + pcpu = pkvm_early_alloc_contig(PKVM_PCPU_PAGES); + if (!pcpu) + return -ENOMEM; + + /* tmp use host cr3, switch to pkvm owned cr3 after de-privilege */ + pcpu->cr3 = __read_cr3(); + + init_gdt(pcpu); + init_idt(pcpu); + init_tss(pcpu); + + pkvm->pcpus[cpu] = pcpu; + + return 0; +} + __init int pkvm_init(void) { - int ret = 0; + int ret = 0, cpu; pkvm = pkvm_early_alloc_contig(PKVM_PAGES); if (!pkvm) { @@ -89,10 +165,23 @@ __init int pkvm_init(void) if (ret) goto out_free_pkvm; + for_each_possible_cpu(cpu) { + ret = pkvm_setup_pcpu(pkvm, cpu); + if (ret) + goto out_free_cpu; + } + pkvm->num_cpus = num_possible_cpus(); return 0; +out_free_cpu: + for_each_possible_cpu(cpu) { + if (pkvm->pcpus[cpu]) { + pkvm_early_free(pkvm->pcpus[cpu], PKVM_PCPU_PAGES); + pkvm->pcpus[cpu] = NULL; + } + } out_free_pkvm: pkvm_early_free(pkvm, PKVM_PAGES); out: -- 2.25.1