Version 2 of GHCB specification added support for the SNP Extended Guest Request Message NAE event. This event serves a nearly identical purpose to the previously-added SNP_GUEST_REQUEST event, but allows for additional certificate data to be supplied via an additional guest-supplied buffer to be used mainly for verifying the signature of an attestation report as returned by firmware. This certificate data is supplied by userspace, so unlike with SNP_GUEST_REQUEST events, SNP_EXTENDED_GUEST_REQUEST events are first forwarded to userspace via a KVM_EXIT_VMGEXIT exit structure, and then the firmware request is made after the certificate data has been fetched from userspace. Since there is a potential for race conditions where the userspace-supplied certificate data may be out-of-sync relative to the reported TCB or VLEK that firmware will use when signing attestation reports, a hook is also provided so that userspace can be informed once the attestation request is actually completed. See the updates to Documentation/ for more details on these aspects. Signed-off-by: Michael Roth <michael.roth@xxxxxxx> --- Documentation/virt/kvm/api.rst | 87 ++++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/sev.c | 86 +++++++++++++++++++++++++++++++++ arch/x86/kvm/svm/svm.h | 3 ++ include/uapi/linux/kvm.h | 23 +++++++++ 4 files changed, 199 insertions(+) diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index f0b76ff5030d..f3780ac98d56 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -7060,6 +7060,93 @@ Please note that the kernel is allowed to use the kvm_run structure as the primary storage for certain register types. Therefore, the kernel may use the values in kvm_run even if the corresponding bit in kvm_dirty_regs is not set. +:: + + /* KVM_EXIT_VMGEXIT */ + struct kvm_user_vmgexit { + #define KVM_USER_VMGEXIT_REQ_CERTS 1 + __u32 type; /* KVM_USER_VMGEXIT_* type */ + union { + struct { + __u64 data_gpa; + __u64 data_npages; + #define KVM_USER_VMGEXIT_REQ_CERTS_ERROR_INVALID_LEN 1 + #define KVM_USER_VMGEXIT_REQ_CERTS_ERROR_BUSY 2 + #define KVM_USER_VMGEXIT_REQ_CERTS_ERROR_GENERIC (1 << 31) + __u32 ret; + #define KVM_USER_VMGEXIT_REQ_CERTS_FLAGS_NOTIFY_DONE BIT(0) + __u8 flags; + #define KVM_USER_VMGEXIT_REQ_CERTS_STATUS_PENDING 0 + #define KVM_USER_VMGEXIT_REQ_CERTS_STATUS_DONE 1 + __u8 status; + } req_certs; + }; + }; + + +If exit reason is KVM_EXIT_VMGEXIT then it indicates that an SEV-SNP guest +has issued a VMGEXIT instruction (as documented by the AMD Architecture +Programmer's Manual (APM)) to the hypervisor that needs to be serviced by +userspace. These are generally handled by the host kernel, but in some +cases some aspects of handling a VMGEXIT are done in userspace. + +A kvm_user_vmgexit structure is defined to encapsulate the data to be +sent to or returned by userspace. The type field defines the specific type +of exit that needs to be serviced, and that type is used as a discriminator +to determine which union type should be used for input/output. + +KVM_USER_VMGEXIT_REQ_CERTS +-------------------------- + +When an SEV-SNP issues a guest request for an attestation report, it has the +option of issuing it in the form an *extended* guest request when a +certificate blob is returned alongside the attestation report so the guest +can validate the endorsement key used by SNP firmware to sign the report. +These certificates are managed by userspace and are requested via +KVM_EXIT_VMGEXITs using the KVM_USER_VMGEXIT_REQ_CERTS type. + +For the KVM_USER_VMGEXIT_REQ_CERTS type, the req_certs union type +is used. The kernel will supply in 'data_gpa' the value the guest supplies +via the RAX field of the GHCB when issuing extended guest requests. +'data_npages' will similarly contain the value the guest supplies in RBX +denoting the number of shared pages available to write the certificate +data into. + + - If the supplied number of pages is sufficient, userspace should write + the certificate data blob (in the format defined by the GHCB spec) in + the address indicated by 'data_gpa' and set 'ret' to 0. + + - If the number of pages supplied is not sufficient, userspace must write + the required number of pages in 'data_npages' and then set 'ret' to 1. + + - If userspace is temporarily unable to handle the request, 'ret' should + be set to 2 to inform the guest to retry later. + + - If some other error occurred, userspace should set 'ret' to a non-zero + value that is distinct from the specific return values mentioned above. + +Generally some care needs be taken to keep the returned certificate data in +sync with the actual endorsement key in use by firmware at the time the +attestation request is sent to SNP firmware. The recommended scheme to do +this is for the VMM to obtain a shared or exclusive lock on the path the +certificate blob file resides at before reading it and returning it to KVM, +and that it continues to hold the lock until the attestation request is +actually sent to firmware. To facilitate this, the VMM can set the +KVM_USER_VMGEXIT_REQ_CERTS_FLAGS_NOTIFY_DONE flag before returning the +certificate blob, in which case another KVM_EXIT_VMGEXIT of type +KVM_USER_VMGEXIT_REQ_CERTS will be sent to userspace with +KVM_USER_VMGEXIT_REQ_CERTS_STATUS_DONE being set in the status field to +indicate the request is fully-completed and that any associated locks can be +released. + +Tools/libraries that perform updates to SNP firmware TCB values or endorsement +keys (e.g. firmware interfaces such as SNP_COMMIT, SNP_SET_CONFIG, or +SNP_VLEK_LOAD, see Documentation/virt/coco/sev-guest.rst for more details) in +such a way that the certificate blob needs to be updated, should similarly +take an exclusive lock on the certificate blob for the duration of any updates +to firmware or the certificate blob contents to ensure that VMMs using the +above scheme will not return certificate blob data that is out of sync with +firmware. 6. Capabilities that can be enabled on vCPUs ============================================ diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 5c6262f3232f..35f0bd91f92e 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -3297,6 +3297,11 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) if (!sev_snp_guest(vcpu->kvm)) goto vmgexit_err; break; + case SVM_VMGEXIT_EXT_GUEST_REQUEST: + if (!sev_snp_guest(vcpu->kvm) || !kvm_ghcb_rax_is_valid(svm) || + !kvm_ghcb_rbx_is_valid(svm)) + goto vmgexit_err; + break; default: reason = GHCB_ERR_INVALID_EVENT; goto vmgexit_err; @@ -3988,6 +3993,84 @@ static void snp_handle_guest_req(struct vcpu_svm *svm, gpa_t req_gpa, gpa_t resp ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(vmm_ret, fw_err)); } +static int snp_complete_ext_guest_req(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb_control_area *control; + struct kvm *kvm = vcpu->kvm; + sev_ret_code fw_err = 0; + int vmm_ret; + + vmm_ret = vcpu->run->vmgexit.req_certs.ret; + if (vmm_ret) { + if (vmm_ret == SNP_GUEST_VMM_ERR_INVALID_LEN) + vcpu->arch.regs[VCPU_REGS_RBX] = + vcpu->run->vmgexit.req_certs.data_npages; + goto out; + } + + /* + * The request was completed on the previous completion callback and + * this completion is only for the STATUS_DONE userspace notification. + */ + if (vcpu->run->vmgexit.req_certs.status == KVM_USER_VMGEXIT_REQ_CERTS_STATUS_DONE) + goto out_resume; + + control = &svm->vmcb->control; + + if (__snp_handle_guest_req(kvm, control->exit_info_1, + control->exit_info_2, &fw_err)) + vmm_ret = SNP_GUEST_VMM_ERR_GENERIC; + +out: + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(vmm_ret, fw_err)); + + if (vcpu->run->vmgexit.req_certs.flags & KVM_USER_VMGEXIT_REQ_CERTS_FLAGS_NOTIFY_DONE) { + vcpu->run->vmgexit.req_certs.status = KVM_USER_VMGEXIT_REQ_CERTS_STATUS_DONE; + vcpu->run->vmgexit.req_certs.flags = 0; + return 0; /* notify userspace of completion */ + } + +out_resume: + return 1; /* resume guest */ +} + +static int snp_begin_ext_guest_req(struct kvm_vcpu *vcpu) +{ + int vmm_ret = SNP_GUEST_VMM_ERR_GENERIC; + struct vcpu_svm *svm = to_svm(vcpu); + unsigned long data_npages; + sev_ret_code fw_err; + gpa_t data_gpa; + + if (!sev_snp_guest(vcpu->kvm)) + goto abort_request; + + data_gpa = vcpu->arch.regs[VCPU_REGS_RAX]; + data_npages = vcpu->arch.regs[VCPU_REGS_RBX]; + + if (!IS_ALIGNED(data_gpa, PAGE_SIZE)) + goto abort_request; + + /* + * Grab the certificates from userspace so that can be bundled with + * attestation/guest requests. + */ + vcpu->run->exit_reason = KVM_EXIT_VMGEXIT; + vcpu->run->vmgexit.type = KVM_USER_VMGEXIT_REQ_CERTS; + vcpu->run->vmgexit.req_certs.data_gpa = data_gpa; + vcpu->run->vmgexit.req_certs.data_npages = data_npages; + vcpu->run->vmgexit.req_certs.flags = 0; + vcpu->run->vmgexit.req_certs.status = KVM_USER_VMGEXIT_REQ_CERTS_STATUS_PENDING; + vcpu->arch.complete_userspace_io = snp_complete_ext_guest_req; + + return 0; /* forward request to userspace */ + +abort_request: + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, SNP_GUEST_ERR(vmm_ret, fw_err)); + return 1; /* resume guest */ +} + static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm) { struct vmcb_control_area *control = &svm->vmcb->control; @@ -4266,6 +4349,9 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) snp_handle_guest_req(svm, control->exit_info_1, control->exit_info_2); ret = 1; break; + case SVM_VMGEXIT_EXT_GUEST_REQUEST: + ret = snp_begin_ext_guest_req(vcpu); + break; case SVM_VMGEXIT_UNSUPPORTED_EVENT: vcpu_unimpl(vcpu, "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n", diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index e325ede0f463..83c562b4712a 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -309,6 +309,9 @@ struct vcpu_svm { /* Guest GIF value, used when vGIF is not enabled */ bool guest_gif; + + /* Transaction ID associated with SNP config updates */ + u64 snp_transaction_id; }; struct svm_cpu_data { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2190adbe3002..106367d87189 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -135,6 +135,26 @@ struct kvm_xen_exit { } u; }; +struct kvm_user_vmgexit { +#define KVM_USER_VMGEXIT_REQ_CERTS 1 + __u32 type; /* KVM_USER_VMGEXIT_* type */ + union { + struct { + __u64 data_gpa; + __u64 data_npages; +#define KVM_USER_VMGEXIT_REQ_CERTS_ERROR_INVALID_LEN 1 +#define KVM_USER_VMGEXIT_REQ_CERTS_ERROR_BUSY 2 +#define KVM_USER_VMGEXIT_REQ_CERTS_ERROR_GENERIC (1 << 31) + __u32 ret; +#define KVM_USER_VMGEXIT_REQ_CERTS_FLAGS_NOTIFY_DONE BIT(0) + __u8 flags; +#define KVM_USER_VMGEXIT_REQ_CERTS_STATUS_PENDING 0 +#define KVM_USER_VMGEXIT_REQ_CERTS_STATUS_DONE 1 + __u8 status; + } req_certs; + }; +}; + #define KVM_S390_GET_SKEYS_NONE 1 #define KVM_S390_SKEYS_MAX 1048576 @@ -178,6 +198,7 @@ struct kvm_xen_exit { #define KVM_EXIT_NOTIFY 37 #define KVM_EXIT_LOONGARCH_IOCSR 38 #define KVM_EXIT_MEMORY_FAULT 39 +#define KVM_EXIT_VMGEXIT 40 /* For KVM_EXIT_INTERNAL_ERROR */ /* Emulate instruction failed. */ @@ -433,6 +454,8 @@ struct kvm_run { __u64 gpa; __u64 size; } memory_fault; + /* KVM_EXIT_VMGEXIT */ + struct kvm_user_vmgexit vmgexit; /* Fix the size of the union. */ char padding[256]; }; -- 2.25.1