On Wed, Jul 7, 2021 at 11:38 AM Brijesh Singh <brijesh.singh@xxxxxxx> wrote: > > When SEV-SNP is globally enabled on a system, the VMRUN instruction > performs additional security checks on AVIC backing, VMSA, and VMCB page. > On a successful VMRUN, these pages are marked "in-use" by the > hardware in the RMP entry, and any attempt to modify the RMP entry for > these pages will result in page-fault (RMP violation check). > > While performing the RMP check, hardware will try to create a 2MB TLB > entry for the large page accesses. When it does this, it first reads > the RMP for the base of 2MB region and verifies that all this memory is > safe. If AVIC backing, VMSA, and VMCB memory happen to be the base of > 2MB region, then RMP check will fail because of the "in-use" marking for > the base entry of this 2MB region. > > e.g. > > 1. A VMCB was allocated on 2MB-aligned address. > 2. The VMRUN instruction marks this RMP entry as "in-use". > 3. Another process allocated some other page of memory that happened to be > within the same 2MB region. > 4. That process tried to write its page using physmap. > > If the physmap entry in step #4 uses a large (1G/2M) page, then the > hardware will attempt to create a 2M TLB entry. The hardware will find > that the "in-use" bit is set in the RMP entry (because it was a > VMCB page) and will cause an RMP violation check. > > See APM2 section 15.36.12 for more information on VMRUN checks when > SEV-SNP is globally active. > > A generic allocator can return a page which are 2M aligned and will not > be safe to be used when SEV-SNP is globally enabled. Add a > snp_safe_alloc_page() helper that can be used for allocating the > SNP safe memory. The helper allocated 2 pages and splits them into order-1 > allocation. It frees one page and keeps one of the page which is not > 2M aligned. > > Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> Co-developed-by: Marc Orr <marcorr@xxxxxxxxxx> The original version of this patch had this tag. I think it got dropped on accident. > --- > arch/x86/include/asm/kvm_host.h | 1 + > arch/x86/kvm/lapic.c | 5 ++++- > arch/x86/kvm/svm/sev.c | 27 +++++++++++++++++++++++++++ > arch/x86/kvm/svm/svm.c | 16 ++++++++++++++-- > arch/x86/kvm/svm/svm.h | 1 + > 5 files changed, 47 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 55efbacfc244..188110ab2c02 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1383,6 +1383,7 @@ struct kvm_x86_ops { > int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); > > void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); > + void *(*alloc_apic_backing_page)(struct kvm_vcpu *vcpu); > }; > > struct kvm_x86_nested_ops { > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index c0ebef560bd1..d4c77f66d7d5 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -2441,7 +2441,10 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns) > > vcpu->arch.apic = apic; > > - apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); > + if (kvm_x86_ops.alloc_apic_backing_page) > + apic->regs = kvm_x86_ops.alloc_apic_backing_page(vcpu); > + else > + apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); > if (!apic->regs) { > printk(KERN_ERR "malloc apic regs error for vcpu %x\n", > vcpu->vcpu_id); > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index b8505710c36b..411ed72f63af 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -2692,3 +2692,30 @@ void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector) > break; > } > } > + > +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu) > +{ > + unsigned long pfn; > + struct page *p; > + > + if (!cpu_feature_enabled(X86_FEATURE_SEV_SNP)) > + return alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); > + > + p = alloc_pages(GFP_KERNEL_ACCOUNT | __GFP_ZERO, 1); > + if (!p) > + return NULL; > + > + /* split the page order */ > + split_page(p, 1); > + > + /* Find a non-2M aligned page */ > + pfn = page_to_pfn(p); > + if (IS_ALIGNED(__pfn_to_phys(pfn), PMD_SIZE)) { > + pfn++; > + __free_page(p); > + } else { > + __free_page(pfn_to_page(pfn + 1)); > + } > + > + return pfn_to_page(pfn); > +} > diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c > index 2acf187a3100..a7adf6ca1713 100644 > --- a/arch/x86/kvm/svm/svm.c > +++ b/arch/x86/kvm/svm/svm.c > @@ -1336,7 +1336,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) > svm = to_svm(vcpu); > > err = -ENOMEM; > - vmcb01_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); > + vmcb01_page = snp_safe_alloc_page(vcpu); > if (!vmcb01_page) > goto out; > > @@ -1345,7 +1345,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) > * SEV-ES guests require a separate VMSA page used to contain > * the encrypted register state of the guest. > */ > - vmsa_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); > + vmsa_page = snp_safe_alloc_page(vcpu); > if (!vmsa_page) > goto error_free_vmcb_page; > > @@ -4439,6 +4439,16 @@ static int svm_vm_init(struct kvm *kvm) > return 0; > } > > +static void *svm_alloc_apic_backing_page(struct kvm_vcpu *vcpu) > +{ > + struct page *page = snp_safe_alloc_page(vcpu); > + > + if (!page) > + return NULL; > + > + return page_address(page); > +} > + > static struct kvm_x86_ops svm_x86_ops __initdata = { > .hardware_unsetup = svm_hardware_teardown, > .hardware_enable = svm_hardware_enable, > @@ -4564,6 +4574,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { > .complete_emulated_msr = svm_complete_emulated_msr, > > .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, > + > + .alloc_apic_backing_page = svm_alloc_apic_backing_page, > }; > > static struct kvm_x86_init_ops svm_init_ops __initdata = { > diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h > index 5f874168551b..1175edb02d33 100644 > --- a/arch/x86/kvm/svm/svm.h > +++ b/arch/x86/kvm/svm/svm.h > @@ -554,6 +554,7 @@ void sev_es_create_vcpu(struct vcpu_svm *svm); > void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector); > void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu); > void sev_es_unmap_ghcb(struct vcpu_svm *svm); > +struct page *snp_safe_alloc_page(struct kvm_vcpu *vcpu); > > /* vmenter.S */ > > -- > 2.17.1 > >