On Mon, 2021-03-01 at 09:20 -0800, Sean Christopherson wrote: > On Mon, Mar 01, 2021, Kai Huang wrote: > > +static int handle_encls_ecreate(struct kvm_vcpu *vcpu) > > +{ > > + struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; > > + gva_t pageinfo_gva, secs_gva; > > + gva_t metadata_gva, contents_gva; > > + gpa_t metadata_gpa, contents_gpa, secs_gpa; > > + unsigned long metadata_hva, contents_hva, secs_hva; > > + struct sgx_pageinfo pageinfo; > > + struct sgx_secs *contents; > > + u64 attributes, xfrm, size; > > + u32 miscselect; > > + struct x86_exception ex; > > + u8 max_size_log2; > > + int trapnr, r; > > + > > (see below) > > --- cut here --- >8 > > > + sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0); > > + sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1); > > + if (!sgx_12_0 || !sgx_12_1) { > > + kvm_inject_gp(vcpu, 0); > > This should probably be an emulation failure. This code is reached iff SGX1 is > enabled in the guest, userspace done messed up if they enabled SGX1 without > defining CPUID.0x12.1. That also makes it more obvious that burying this in a > helper after a bunch of other checks isn't wrong, i.e. KVM has already verified > that SGX1 is enabled in the guest. > > > + return 1; > > + } > > --- > > > + > > + if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || > > + sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) > > + return 1; > > + > > + /* > > + * Copy the PAGEINFO to local memory, its pointers need to be > > + * translated, i.e. we need to do a deep copy/translate. > > + */ > > + r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, > > + sizeof(pageinfo), &ex); > > + if (r == X86EMUL_PROPAGATE_FAULT) { > > + kvm_inject_emulated_page_fault(vcpu, &ex); > > + return 1; > > + } else if (r != X86EMUL_CONTINUE) { > > + sgx_handle_emulation_failure(vcpu, pageinfo_gva, size); > > + return 0; > > + } > > + > > + if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || > > + sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, > > + &contents_gva)) > > + return 1; > > + > > + /* > > + * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. > > + * Resume the guest on failure to inject a #PF. > > + */ > > + if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || > > + sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || > > + sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) > > + return 1; > > + > > + /* > > + * ...and then to HVA. The order of accesses isn't architectural, i.e. > > + * KVM doesn't have to fully process one address at a time. Exit to > > + * userspace if a GPA is invalid. > > + */ > > + if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || > > + sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || > > + sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) > > + return 0; > > + /* > > + * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the > > + * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and > > + * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to > > + * enforce restriction of access to the PROVISIONKEY. > > + */ > > + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); > > + if (!contents) > > + return -ENOMEM; > > --- cut here --- >8 > > > + > > + /* Exit to userspace if copying from a host userspace address fails. */ > > + if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) > > This, and every failure path below, will leak 'contents'. The easiest thing is > probably to wrap everything in "cut here" in a separate helper. The CPUID > lookups can be , e.g. > > contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); > if (!contents) > return -ENOMEM; > > r = __handle_encls_ecreate(vcpu, &pageinfo, secs); > > free_page((unsigned long)contents); > return r; > > And then the helper can be everything below, plus the CPUID lookup: > > sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0); > sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1); > if (!sgx_12_0 || !sgx_12_1) { > vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; > vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; > vcpu->run->internal.ndata = 0; > return 0; > } > > > Hi Sean, I ended up with below: 1) Copy contents is out of __handle_encls_ecreate(), since from my perspective it is more reasonable split in this way logically, that when __handle_encls_ecreate() is called, pageinfo is already ready, but policy enforcement still needs to be checked. 2) Finding sgx_12_0/1 is at first in __handle_encls_ecreate(), since provisionkey bit check requires sgx_12_1->eax. 3) __handle_encls_ecreate() requires secs_gva since sgx_inject_fault() requires it. Is it OK to you? +static int __handle_encls_ecreate(struct kvm_vcpu *vcpu, + struct sgx_pageinfo *pageinfo, + unsigned long secs_hva, + gva_t secs_gva) +{ + struct sgx_secs *contents = (struct sgx_secs *)pageinfo->contents; + struct kvm_cpuid_entry2 *sgx_12_0, *sgx_12_1; + u64 attributes, xfrm, size; + u32 miscselect; + u8 max_size_log2; + int trapnr; + + sgx_12_0 = kvm_find_cpuid_entry(vcpu, 0x12, 0); + sgx_12_1 = kvm_find_cpuid_entry(vcpu, 0x12, 1); + if (!sgx_12_0 || !sgx_12_1) { + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; + } + + miscselect = contents->miscselect; + attributes = contents->attributes; + xfrm = contents->xfrm; + size = contents->size; + + /* Enforce restriction of access to the PROVISIONKEY. */ + if (!vcpu->kvm->arch.sgx_provisioning_allowed && + (attributes & SGX_ATTR_PROVISIONKEY)) { + if (sgx_12_1->eax & SGX_ATTR_PROVISIONKEY) + pr_warn_once("KVM: SGX PROVISIONKEY advertised but not allowed\n"); + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Enforce CPUID restrictions on MISCSELECT, ATTRIBUTES and XFRM. */ + if ((u32)miscselect & ~sgx_12_0->ebx || + (u32)attributes & ~sgx_12_1->eax || + (u32)(attributes >> 32) & ~sgx_12_1->ebx || + (u32)xfrm & ~sgx_12_1->ecx || + (u32)(xfrm >> 32) & ~sgx_12_1->edx) { + kvm_inject_gp(vcpu, 0); + return 1; + } + + /* Enforce CPUID restriction on max enclave size. */ + max_size_log2 = (attributes & SGX_ATTR_MODE64BIT) ? sgx_12_0->edx >> 8 : + sgx_12_0->edx; + if (size >= BIT_ULL(max_size_log2)) + kvm_inject_gp(vcpu, 0); + + if (sgx_virt_ecreate(pageinfo, (void __user *)secs_hva, &trapnr)) + return sgx_inject_fault(vcpu, secs_gva, trapnr); + + return kvm_skip_emulated_instruction(vcpu); +} + +static int handle_encls_ecreate(struct kvm_vcpu *vcpu) +{ + gva_t pageinfo_gva, secs_gva; + gva_t metadata_gva, contents_gva; + gpa_t metadata_gpa, contents_gpa, secs_gpa; + unsigned long metadata_hva, contents_hva, secs_hva; + struct sgx_pageinfo pageinfo; + struct sgx_secs *contents; + struct x86_exception ex; + int r; + + if (sgx_get_encls_gva(vcpu, kvm_rbx_read(vcpu), 32, 32, &pageinfo_gva) || + sgx_get_encls_gva(vcpu, kvm_rcx_read(vcpu), 4096, 4096, &secs_gva)) + return 1; + + /* + * Copy the PAGEINFO to local memory, its pointers need to be + * translated, i.e. we need to do a deep copy/translate. + */ + r = kvm_read_guest_virt(vcpu, pageinfo_gva, &pageinfo, + sizeof(pageinfo), &ex); + if (r == X86EMUL_PROPAGATE_FAULT) { + kvm_inject_emulated_page_fault(vcpu, &ex); + return 1; + } else if (r != X86EMUL_CONTINUE) { + sgx_handle_emulation_failure(vcpu, pageinfo_gva, + sizeof(pageinfo)); + return 0; + } + + if (sgx_get_encls_gva(vcpu, pageinfo.metadata, 64, 64, &metadata_gva) || + sgx_get_encls_gva(vcpu, pageinfo.contents, 4096, 4096, + &contents_gva)) + return 1; + + /* + * Translate the SECINFO, SOURCE and SECS pointers from GVA to GPA. + * Resume the guest on failure to inject a #PF. + */ + if (sgx_gva_to_gpa(vcpu, metadata_gva, false, &metadata_gpa) || + sgx_gva_to_gpa(vcpu, contents_gva, false, &contents_gpa) || + sgx_gva_to_gpa(vcpu, secs_gva, true, &secs_gpa)) + return 1; + + /* + * ...and then to HVA. The order of accesses isn't architectural, i.e. + * KVM doesn't have to fully process one address at a time. Exit to + * userspace if a GPA is invalid. + */ + if (sgx_gpa_to_hva(vcpu, metadata_gpa, &metadata_hva) || + sgx_gpa_to_hva(vcpu, contents_gpa, &contents_hva) || + sgx_gpa_to_hva(vcpu, secs_gpa, &secs_hva)) + return 0; + + /* + * Copy contents into kernel memory to prevent TOCTOU attack. E.g. the + * guest could do ECREATE w/ SECS.SGX_ATTR_PROVISIONKEY=0, and + * simultaneously set SGX_ATTR_PROVISIONKEY to bypass the check to + * enforce restriction of access to the PROVISIONKEY. + */ + contents = (struct sgx_secs *)__get_free_page(GFP_KERNEL); + if (!contents) + return -ENOMEM; + + /* Exit to userspace if copying from a host userspace address fails. */ + if (sgx_read_hva(vcpu, contents_hva, (void *)contents, PAGE_SIZE)) { + free_page((unsigned long)contents); + return 0; + } + + pageinfo.metadata = metadata_hva; + pageinfo.contents = (u64)contents; + + r = __handle_encls_ecreate(vcpu, &pageinfo, secs_hva, secs_gva); + + free_page((unsigned long)contents); + + return r; +}