From: Brijesh Singh <brijesh.singh@xxxxxxx> This hypercall is used by the SEV guest to notify a change in the page encryption status to the hypervisor. The hypercall should be invoked only when the encryption attribute is changed from encrypted -> decrypted and vice versa. By default all guest pages are considered encrypted. The patch introduces a new shared pages list implemented as a sorted linked list to track the shared/unencrypted regions marked by the guest hypercall. Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Cc: Ingo Molnar <mingo@xxxxxxxxxx> Cc: "H. Peter Anvin" <hpa@xxxxxxxxx> Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> Cc: "Radim Krčmář" <rkrcmar@xxxxxxxxxx> Cc: Joerg Roedel <joro@xxxxxxxxxx> Cc: Borislav Petkov <bp@xxxxxxx> Cc: Tom Lendacky <thomas.lendacky@xxxxxxx> Cc: x86@xxxxxxxxxx Cc: kvm@xxxxxxxxxxxxxxx Cc: linux-kernel@xxxxxxxxxxxxxxx Signed-off-by: Brijesh Singh <brijesh.singh@xxxxxxx> Co-developed-by: Ashish Kalra <ashish.kalra@xxxxxxx> Signed-off-by: Ashish Kalra <ashish.kalra@xxxxxxx> --- Documentation/virt/kvm/hypercalls.rst | 15 +++ arch/x86/include/asm/kvm_host.h | 2 + arch/x86/kvm/svm/sev.c | 150 ++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.c | 2 + arch/x86/kvm/svm/svm.h | 5 + arch/x86/kvm/vmx/vmx.c | 1 + arch/x86/kvm/x86.c | 6 ++ include/uapi/linux/kvm_para.h | 1 + 8 files changed, 182 insertions(+) diff --git a/Documentation/virt/kvm/hypercalls.rst b/Documentation/virt/kvm/hypercalls.rst index ed4fddd364ea..7aff0cebab7c 100644 --- a/Documentation/virt/kvm/hypercalls.rst +++ b/Documentation/virt/kvm/hypercalls.rst @@ -169,3 +169,18 @@ a0: destination APIC ID :Usage example: When sending a call-function IPI-many to vCPUs, yield if any of the IPI target vCPUs was preempted. + + +8. KVM_HC_PAGE_ENC_STATUS +------------------------- +:Architecture: x86 +:Status: active +:Purpose: Notify the encryption status changes in guest page table (SEV guest) + +a0: the guest physical address of the start page +a1: the number of pages +a2: encryption attribute + + Where: + * 1: Encryption attribute is set + * 0: Encryption attribute is cleared diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3d6616f6f6ef..2da5f5e2a10e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1301,6 +1301,8 @@ struct kvm_x86_ops { int (*complete_emulated_msr)(struct kvm_vcpu *vcpu, int err); void (*vcpu_deliver_sipi_vector)(struct kvm_vcpu *vcpu, u8 vector); + int (*page_enc_status_hc)(struct kvm *kvm, unsigned long gpa, + unsigned long sz, unsigned long mode); }; struct kvm_x86_nested_ops { diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 25eaf35ba51d..55c628df5155 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -45,6 +45,11 @@ struct enc_region { unsigned long size; }; +struct shared_region { + struct list_head list; + unsigned long gfn_start, gfn_end; +}; + static int sev_flush_asids(void) { int ret, error = 0; @@ -196,6 +201,8 @@ static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp) sev->active = true; sev->asid = asid; INIT_LIST_HEAD(&sev->regions_list); + INIT_LIST_HEAD(&sev->shared_pages_list); + sev->shared_pages_list_count = 0; return 0; @@ -1473,6 +1480,148 @@ static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp) return ret; } +static int remove_shared_region(unsigned long start, unsigned long end, + struct list_head *head) +{ + struct shared_region *pos; + + list_for_each_entry(pos, head, list) { + if (pos->gfn_start == start && + pos->gfn_end == end) { + list_del(&pos->list); + kfree(pos); + return -1; + } else if (start >= pos->gfn_start && end <= pos->gfn_end) { + if (start == pos->gfn_start) + pos->gfn_start = end + 1; + else if (end == pos->gfn_end) + pos->gfn_end = start - 1; + else { + /* Do a de-merge -- split linked list nodes */ + unsigned long tmp; + struct shared_region *shrd_region; + + tmp = pos->gfn_end; + pos->gfn_end = start-1; + shrd_region = kzalloc(sizeof(*shrd_region), GFP_KERNEL_ACCOUNT); + if (!shrd_region) + return -ENOMEM; + shrd_region->gfn_start = end + 1; + shrd_region->gfn_end = tmp; + list_add(&shrd_region->list, &pos->list); + return 1; + } + return 0; + } + } + return 0; +} + +static int add_shared_region(unsigned long start, unsigned long end, + struct list_head *shared_pages_list) +{ + struct list_head *head = shared_pages_list; + struct shared_region *shrd_region; + struct shared_region *pos; + + if (list_empty(head)) { + shrd_region = kzalloc(sizeof(*shrd_region), GFP_KERNEL_ACCOUNT); + if (!shrd_region) + return -ENOMEM; + shrd_region->gfn_start = start; + shrd_region->gfn_end = end; + list_add_tail(&shrd_region->list, head); + return 1; + } + + /* + * Shared pages list is a sorted list in ascending order of + * guest PA's and also merges consecutive range of guest PA's + */ + list_for_each_entry(pos, head, list) { + if (pos->gfn_end < start) + continue; + /* merge consecutive guest PA(s) */ + if (pos->gfn_start <= start && pos->gfn_end >= start) { + pos->gfn_end = end; + return 0; + } + break; + } + /* + * Add a new node, allocate nodes using GFP_KERNEL_ACCOUNT so that + * kernel memory can be tracked/throttled in case a + * malicious guest makes infinite number of hypercalls to + * exhaust host kernel memory and cause a DOS attack. + */ + shrd_region = kzalloc(sizeof(*shrd_region), GFP_KERNEL_ACCOUNT); + if (!shrd_region) + return -ENOMEM; + shrd_region->gfn_start = start; + shrd_region->gfn_end = end; + list_add_tail(&shrd_region->list, &pos->list); + return 1; +} + +int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, + unsigned long npages, unsigned long enc) +{ + struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info; + kvm_pfn_t pfn_start, pfn_end; + gfn_t gfn_start, gfn_end; + int ret = 0; + + if (!sev_guest(kvm)) + return -EINVAL; + + if (!npages) + return 0; + + gfn_start = gpa_to_gfn(gpa); + gfn_end = gfn_start + npages; + + /* out of bound access error check */ + if (gfn_end <= gfn_start) + return -EINVAL; + + /* lets make sure that gpa exist in our memslot */ + pfn_start = gfn_to_pfn(kvm, gfn_start); + pfn_end = gfn_to_pfn(kvm, gfn_end); + + if (is_error_noslot_pfn(pfn_start) && !is_noslot_pfn(pfn_start)) { + /* + * Allow guest MMIO range(s) to be added + * to the shared pages list. + */ + return -EINVAL; + } + + if (is_error_noslot_pfn(pfn_end) && !is_noslot_pfn(pfn_end)) { + /* + * Allow guest MMIO range(s) to be added + * to the shared pages list. + */ + return -EINVAL; + } + + mutex_lock(&kvm->lock); + + if (enc) { + ret = remove_shared_region(gfn_start, gfn_end, + &sev->shared_pages_list); + if (ret != -ENOMEM) + sev->shared_pages_list_count += ret; + } else { + ret = add_shared_region(gfn_start, gfn_end, + &sev->shared_pages_list); + if (ret > 0) + sev->shared_pages_list_count++; + } + + mutex_unlock(&kvm->lock); + return ret; +} + int svm_mem_enc_op(struct kvm *kvm, void __user *argp) { struct kvm_sev_cmd sev_cmd; @@ -1693,6 +1842,7 @@ void sev_vm_destroy(struct kvm *kvm) sev_unbind_asid(kvm, sev->handle); sev_asid_free(sev->asid); + sev->shared_pages_list_count = 0; } void __init sev_hardware_setup(void) diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index f923e14e87df..bb249ec625fc 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -4536,6 +4536,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .complete_emulated_msr = svm_complete_emulated_msr, .vcpu_deliver_sipi_vector = svm_vcpu_deliver_sipi_vector, + + .page_enc_status_hc = svm_page_enc_status_hc, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 0fe874ae5498..6437c1fa1f24 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -79,6 +79,9 @@ struct kvm_sev_info { unsigned long pages_locked; /* Number of pages locked */ struct list_head regions_list; /* List of registered regions */ u64 ap_jump_table; /* SEV-ES AP Jump Table address */ + /* List and count of shared pages */ + int shared_pages_list_count; + struct list_head shared_pages_list; }; struct kvm_svm { @@ -472,6 +475,8 @@ int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); int nested_svm_exit_special(struct vcpu_svm *svm); void sync_nested_vmcb_control(struct vcpu_svm *svm); +int svm_page_enc_status_hc(struct kvm *kvm, unsigned long gpa, + unsigned long npages, unsigned long enc); extern struct kvm_x86_nested_ops svm_nested_ops; diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index cc60b1fc3ee7..bcbf53851612 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -7705,6 +7705,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .can_emulate_instruction = vmx_can_emulate_instruction, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, .migrate_timers = vmx_migrate_timers, + .page_enc_status_hc = NULL, .msr_filter_changed = vmx_msr_filter_changed, .complete_emulated_msr = kvm_complete_insn_gp, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 76bce832cade..2f17f0f9ace7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8162,6 +8162,12 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) kvm_sched_yield(vcpu->kvm, a0); ret = 0; break; + case KVM_HC_PAGE_ENC_STATUS: + ret = -KVM_ENOSYS; + if (kvm_x86_ops.page_enc_status_hc) + ret = kvm_x86_ops.page_enc_status_hc(vcpu->kvm, + a0, a1, a2); + break; default: ret = -KVM_ENOSYS; break; diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index 8b86609849b9..847b83b75dc8 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -29,6 +29,7 @@ #define KVM_HC_CLOCK_PAIRING 9 #define KVM_HC_SEND_IPI 10 #define KVM_HC_SCHED_YIELD 11 +#define KVM_HC_PAGE_ENC_STATUS 12 /* * hypercalls use architecture specific -- 2.17.1