On Fri, Apr 03, 2020 at 01:57:48AM +0000, Ashish Kalra wrote: > On Thu, Apr 02, 2020 at 06:31:54PM -0700, Krish Sadhukhan wrote: > > > > On 3/29/20 11:22 PM, Ashish Kalra wrote: > > > From: Brijesh Singh <Brijesh.Singh@xxxxxxx> > > > > > > This hypercall is used by the SEV guest to notify a change in the page > > > encryption status to the hypervisor. The hypercall should be invoked > > > only when the encryption attribute is changed from encrypted -> decrypted > > > and vice versa. By default all guest pages are considered encrypted. > > > > > > Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> > > > Cc: Ingo Molnar <mingo@xxxxxxxxxx> > > > Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> > > > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > > > Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx> > > > Cc: Joerg Roedel <joro@xxxxxxxxxx> > > > Cc: Borislav Petkov <bp@xxxxxxx> > > > Cc: Tom Lendacky <thomas.lendacky@xxxxxxx> > > > Cc: x86@xxxxxxxxxx > > > Cc: kvm@xxxxxxxxxxxxxxx > > > Cc: linux-kernel@xxxxxxxxxxxxxxx > > > Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> > > > Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx> > > > --- > > > Documentation/virt/kvm/hypercalls.rst | 15 +++++ > > > arch/x86/include/asm/kvm_host.h | 2 + > > > arch/x86/kvm/svm.c | 95 +++++++++++++++++++++++++++ > > > arch/x86/kvm/vmx/vmx.c | 1 + > > > arch/x86/kvm/x86.c | 6 ++ > > > include/uapi/linux/kvm_para.h | 1 + > > > 6 files changed, 120 insertions(+) > > > > > > diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst > > > index dbaf207e560d..ff5287e68e81 100644 > > > --- a/Documentation/virt/kvm/hypercalls.rst > > > +++ b/Documentation/virt/kvm/hypercalls.rst > > > @@ -169,3 +169,18 @@ a0: destination APIC ID > > > :Usage example: When sending a call-function IPI-many to vCPUs, yield if > > > any of the IPI target vCPUs was preempted. > > > + > > > + > > > +8. KVM_HC_PAGE_ENC_STATUS > > > +------------------------- > > > +:Architecture: x86 > > > +:Status: active > > > +:Purpose: Notify the encryption status changes in guest page table (SEV guest) > > > + > > > +a0: the guest physical address of the start page > > > +a1: the number of pages > > > +a2: encryption attribute > > > + > > > + Where: > > > + * 1: Encryption attribute is set > > > + * 0: Encryption attribute is cleared > > > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > > > index 98959e8cd448..90718fa3db47 100644 > > > --- a/arch/x86/include/asm/kvm_host.h > > > +++ b/arch/x86/include/asm/kvm_host.h > > > @@ -1267,6 +1267,8 @@ struct kvm_x86_ops { > > > bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); > > > int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); > > > + int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, > > > + unsigned long sz, unsigned long mode); > > > }; > > > struct kvm_arch_async_pf { > > > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > > > index 7c2721e18b06..1d8beaf1bceb 100644 > > > --- a/arch/x86/kvm/svm.c > > > +++ b/arch/x86/kvm/svm.c > > > @@ -136,6 +136,8 @@ struct kvm_sev_info { > > > int fd; /* SEV device fd */ > > > unsigned long pages_locked; /* Number of pages locked */ > > > struct list_head regions_list; /* List of registered regions */ > > > + unsigned long *page_enc_bmap; > > > + unsigned long page_enc_bmap_size; > > > }; > > > struct kvm_svm { > > > @@ -1991,6 +1993,9 @@ static void sev_vm_destroy(struct kvm *kvm) > > > sev_unbind_asid(kvm, sev->handle); > > > sev_asid_free(sev->asid); > > > + > > > + kvfree(sev->page_enc_bmap); > > > + sev->page_enc_bmap = NULL; > > > } > > > static void avic_vm_destroy(struct kvm *kvm) > > > @@ -7593,6 +7598,94 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) > > > return ret; > > > } > > > +static int sev_resize_page_enc_bitmap(struct kvm *kvm, unsigned long new_size) > > > +{ > > > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > > > + unsigned long *map; > > > + unsigned long sz; > > > + > > > + if (sev->page_enc_bmap_size >= new_size) > > > + return 0; > > > + > > > + sz = ALIGN(new_size, BITS_PER_LONG) / 8; > > > + > > > + map = vmalloc(sz); > > > > > > Just wondering why we can't directly modify sev->page_enc_bmap. > > > > Because the page_enc_bitmap needs to be re-sized here, it needs to be > expanded here. > I don't believe there is anything is like a realloc() kind of equivalent for the kmalloc() interfaces. Thanks, Ashish > > > + if (!map) { > > > + pr_err_once("Failed to allocate encrypted bitmap size %lx\n", > > > + sz); > > > + return -ENOMEM; > > > + } > > > + > > > + /* mark the page encrypted (by default) */ > > > + memset(map, 0xff, sz); > > > + > > > + bitmap_copy(map, sev->page_enc_bmap, sev->page_enc_bmap_size); > > > + kvfree(sev->page_enc_bmap); > > > + > > > + sev->page_enc_bmap = map; > > > + sev->page_enc_bmap_size = new_size; > > > + > > > + return 0; > > > +} > > > + > > > +static int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, > > > + unsigned long npages, unsigned long enc) > > > +{ > > > + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; > > > + kvm_pfn_t pfn_start, pfn_end; > > > + gfn_t gfn_start, gfn_end; > > > + int ret; > > > + > > > + if (!sev_guest(kvm)) > > > + return -EINVAL; > > > + > > > + if (!npages) > > > + return 0; > > > + > > > + gfn_start = gpa_to_gfn(gpa); > > > + gfn_end = gfn_start + npages; > > > + > > > + /* out of bound access error check */ > > > + if (gfn_end <= gfn_start) > > > + return -EINVAL; > > > + > > > + /* lets make sure that gpa exist in our memslot */ > > > + pfn_start = gfn_to_pfn(kvm, gfn_start); > > > + pfn_end = gfn_to_pfn(kvm, gfn_end); > > > + > > > + if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) { > > > + /* > > > + * Allow guest MMIO range(s) to be added > > > + * to the page encryption bitmap. > > > + */ > > > + return -EINVAL; > > > + } > > > + > > > + if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) { > > > + /* > > > + * Allow guest MMIO range(s) to be added > > > + * to the page encryption bitmap. > > > + */ > > > + return -EINVAL; > > > + } > > > > > > It seems is_error_noslot_pfn() covers both cases - i) gfn slot is absent, > > ii) failure to translate to pfn. So do we still need is_noslot_pfn() ? > > > > We do need to check for !is_noslot_pfn(..) additionally as the MMIO ranges will not > be having a slot allocated. > > Thanks, > Ashish > > > > + > > > + mutex_lock(&kvm->lock); > > > + ret = sev_resize_page_enc_bitmap(kvm, gfn_end); > > > + if (ret) > > > + goto unlock; > > > + > > > + if (enc) > > > + __bitmap_set(sev->page_enc_bmap, gfn_start, > > > + gfn_end - gfn_start); > > > + else > > > + __bitmap_clear(sev->page_enc_bmap, gfn_start, > > > + gfn_end - gfn_start); > > > + > > > +unlock: > > > + mutex_unlock(&kvm->lock); > > > + return ret; > > > +} > > > + > > > static int svm_mem_enc_op(struct kvm *kvm, void __user *argp) > > > { > > > struct kvm_sev_cmd sev_cmd; > > > @@ -7995,6 +8088,8 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { > > > .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, > > > .apic_init_signal_blocked = svm_apic_init_signal_blocked, > > > + > > > + .page_enc_status_hc = svm_page_enc_status_hc, > > > > > > Why not place it where other encryption ops are located ? > > > > ... > > > > .mem_enc_unreg_region > > > > + .page_enc_status_hc = svm_page_enc_status_hc > > > > > }; > > > static int __init svm_init(void) > > > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c > > > index 079d9fbf278e..f68e76ee7f9c 100644 > > > --- a/arch/x86/kvm/vmx/vmx.c > > > +++ b/arch/x86/kvm/vmx/vmx.c > > > @@ -8001,6 +8001,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { > > > .nested_get_evmcs_version = NULL, > > > .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, > > > .apic_init_signal_blocked = vmx_apic_init_signal_blocked, > > > + .page_enc_status_hc = NULL, > > > }; > > > static void vmx_cleanup_l1d_flush(void) > > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > > > index cf95c36cb4f4..68428eef2dde 100644 > > > --- a/arch/x86/kvm/x86.c > > > +++ b/arch/x86/kvm/x86.c > > > @@ -7564,6 +7564,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) > > > kvm_sched_yield(vcpu->kvm, a0); > > > ret = 0; > > > break; > > > + case KVM_HC_PAGE_ENC_STATUS: > > > + ret = -KVM_ENOSYS; > > > + if (kvm_x86_ops->page_enc_status_hc) > > > + ret = kvm_x86_ops->page_enc_status_hc(vcpu->kvm, > > > + a0, a1, a2); > > > + break; > > > default: > > > ret = -KVM_ENOSYS; > > > break; > > > diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h > > > index 8b86609849b9..847b83b75dc8 100644 > > > --- a/include/uapi/linux/kvm_para.h > > > +++ b/include/uapi/linux/kvm_para.h > > > @@ -29,6 +29,7 @@ > > > #define KVM_HC_CLOCK_PAIRING 9 > > > #define KVM_HC_SEND_IPI 10 > > > #define KVM_HC_SCHED_YIELD 11 > > > +#define KVM_HC_PAGE_ENC_STATUS 12 > > > /* > > > * hypercalls use architecture specific