On Fri, May 10, 2024 at 07:09:07PM +0200, Paolo Bonzini wrote: > On 5/10/24 03:58, Michael Roth wrote: > > There are a few edge-cases that the current processing for GHCB PSC > > requests doesn't handle properly: > > > > - KVM properly ignores SMASH/UNSMASH ops when they are embedded in a > > PSC request buffer which contains other PSC operations, but > > inadvertantly forwards them to userspace as private->shared PSC > > requests if they appear at the end of the buffer. Make sure these are > > ignored instead, just like cases where they are not at the end of the > > request buffer. > > > > - Current code handles non-zero 'cur_page' fields when they are at the > > beginning of a new GPA range, but it is not handling properly when > > iterating through subsequent entries which are otherwise part of a > > contiguous range. Fix up the handling so that these entries are not > > combined into a larger contiguous range that include unintended GPA > > ranges and are instead processed later as the start of a new > > contiguous range. > > > > - The page size variable used to track 2M entries in KVM for inflight PSCs > > might be artifically set to a different value, which can lead to > > unexpected values in the entry's final 'cur_page' update. Use the > > entry's 'pagesize' field instead to determine what the value of > > 'cur_page' should be upon completion of processing. > > > > While here, also add a small helper for clearing in-flight PSCs > > variables and fix up comments for better readability. > > > > Fixes: 266205d810d2 ("KVM: SEV: Add support to handle Page State Change VMGEXIT") > > Signed-off-by: Michael Roth <michael.roth@xxxxxxx> > > There are some more improvements that can be made to the readability of > the code... this one is already better than the patch is fixing up, but I > don't like the code that is in the loop even though it is unconditionally > followed by "break". > > Here's my attempt at replacing this patch, which is really more of a > rewrite of the whole function... Untested beyond compilation. Thanks for the suggested rework. I tested with/without 2MB pages and everything worked as-written. This is the full/squashed patch I plan to include in the pull request: https://github.com/mdroth/linux/commit/91f6d31c4dfc88dd1ac378e2db6117b0c982e63c -Mike > > diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c > index 35f0bd91f92e..6e612789c35f 100644 > --- a/arch/x86/kvm/svm/sev.c > +++ b/arch/x86/kvm/svm/sev.c > @@ -3555,23 +3555,25 @@ struct psc_buffer { > static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc); > -static int snp_complete_psc(struct kvm_vcpu *vcpu) > +static void snp_complete_psc(struct vcpu_svm *svm, u64 psc_ret) > +{ > + svm->sev_es.psc_inflight = 0; > + svm->sev_es.psc_idx = 0; > + svm->sev_es.psc_2m = 0; > + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret); > +} > + > +static void __snp_complete_one_psc(struct vcpu_svm *svm) > { > - struct vcpu_svm *svm = to_svm(vcpu); > struct psc_buffer *psc = svm->sev_es.ghcb_sa; > struct psc_entry *entries = psc->entries; > struct psc_hdr *hdr = &psc->hdr; > - __u64 psc_ret; > __u16 idx; > - if (vcpu->run->hypercall.ret) { > - psc_ret = VMGEXIT_PSC_ERROR_GENERIC; > - goto out_resume; > - } > - > /* > * Everything in-flight has been processed successfully. Update the > - * corresponding entries in the guest's PSC buffer. > + * corresponding entries in the guest's PSC buffer and zero out the > + * count of in-flight PSC entries. > */ > for (idx = svm->sev_es.psc_idx; svm->sev_es.psc_inflight; > svm->sev_es.psc_inflight--, idx++) { > @@ -3581,17 +3583,22 @@ static int snp_complete_psc(struct kvm_vcpu *vcpu) > } > hdr->cur_entry = idx; > +} > + > +static int snp_complete_one_psc(struct kvm_vcpu *vcpu) > +{ > + struct vcpu_svm *svm = to_svm(vcpu); > + struct psc_buffer *psc = svm->sev_es.ghcb_sa; > + > + if (vcpu->run->hypercall.ret) { > + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); > + return 1; /* resume guest */ > + } > + > + __snp_complete_one_psc(svm); > /* Handle the next range (if any). */ > return snp_begin_psc(svm, psc); > - > -out_resume: > - svm->sev_es.psc_idx = 0; > - svm->sev_es.psc_inflight = 0; > - svm->sev_es.psc_2m = false; > - ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret); > - > - return 1; /* resume guest */ > } > static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc) > @@ -3601,18 +3608,20 @@ static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc) > struct psc_hdr *hdr = &psc->hdr; > struct psc_entry entry_start; > u16 idx, idx_start, idx_end; > - __u64 psc_ret, gpa; > + u64 gfn; > int npages; > - > - /* There should be no other PSCs in-flight at this point. */ > - if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) { > - psc_ret = VMGEXIT_PSC_ERROR_GENERIC; > - goto out_resume; > - } > + bool huge; > if (!(vcpu->kvm->arch.hypercall_exit_enabled & (1 << KVM_HC_MAP_GPA_RANGE))) { > - psc_ret = VMGEXIT_PSC_ERROR_GENERIC; > - goto out_resume; > + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); > + return 1; > + } > + > +next_range: > + /* There should be no other PSCs in-flight at this point. */ > + if (WARN_ON_ONCE(svm->sev_es.psc_inflight)) { > + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_GENERIC); > + return 1; > } > /* > @@ -3624,97 +3633,99 @@ static int snp_begin_psc(struct vcpu_svm *svm, struct psc_buffer *psc) > idx_end = hdr->end_entry; > if (idx_end >= VMGEXIT_PSC_MAX_COUNT) { > - psc_ret = VMGEXIT_PSC_ERROR_INVALID_HDR; > - goto out_resume; > - } > - > - /* Nothing more to process. */ > - if (idx_start > idx_end) { > - psc_ret = 0; > - goto out_resume; > + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_HDR); > + return 1; > } > /* Find the start of the next range which needs processing. */ > for (idx = idx_start; idx <= idx_end; idx++, hdr->cur_entry++) { > - __u16 cur_page; > - gfn_t gfn; > - bool huge; > - > entry_start = entries[idx]; > - > - /* Only private/shared conversions are currently supported. */ > - if (entry_start.operation != VMGEXIT_PSC_OP_PRIVATE && > - entry_start.operation != VMGEXIT_PSC_OP_SHARED) > - continue; > - > gfn = entry_start.gfn; > - cur_page = entry_start.cur_page; > huge = entry_start.pagesize; > + npages = huge ? 512 : 1; > - if ((huge && (cur_page > 512 || !IS_ALIGNED(gfn, 512))) || > - (!huge && cur_page > 1)) { > - psc_ret = VMGEXIT_PSC_ERROR_INVALID_ENTRY; > - goto out_resume; > + if (entry_start.cur_page > npages || !IS_ALIGNED(gfn, npages)) { > + snp_complete_psc(svm, VMGEXIT_PSC_ERROR_INVALID_ENTRY); > + return 1; > } > + if (entry_start.cur_page) { > + /* > + * If this is a partially-completed 2M range, force 4K > + * handling for the remaining pages since they're effectively > + * split at this point. Subsequent code should ensure this > + * doesn't get combined with adjacent PSC entries where 2M > + * handling is still possible. > + */ > + npages -= entry_start.cur_page; > + gfn += entry_start.cur_page; > + huge = false; > + } > + if (npages) > + break; > + > /* All sub-pages already processed. */ > - if ((huge && cur_page == 512) || (!huge && cur_page == 1)) > - continue; > - > - /* > - * If this is a partially-completed 2M range, force 4K handling > - * for the remaining pages since they're effectively split at > - * this point. Subsequent code should ensure this doesn't get > - * combined with adjacent PSC entries where 2M handling is still > - * possible. > - */ > - svm->sev_es.psc_2m = cur_page ? false : huge; > - svm->sev_es.psc_idx = idx; > - svm->sev_es.psc_inflight = 1; > - > - gpa = gfn_to_gpa(gfn + cur_page); > - npages = huge ? 512 - cur_page : 1; > - break; > } > + if (idx > idx_end) { > + /* Nothing more to process. */ > + snp_complete_psc(svm, 0); > + return 1; > + } > + > + svm->sev_es.psc_2m = huge; > + svm->sev_es.psc_idx = idx; > + svm->sev_es.psc_inflight = 1; > + > /* > * Find all subsequent PSC entries that contain adjacent GPA > * ranges/operations and can be combined into a single > * KVM_HC_MAP_GPA_RANGE exit. > */ > - for (idx = svm->sev_es.psc_idx + 1; idx <= idx_end; idx++) { > + while (++idx <= idx_end) { > struct psc_entry entry = entries[idx]; > if (entry.operation != entry_start.operation || > - entry.gfn != entry_start.gfn + npages || > - !!entry.pagesize != svm->sev_es.psc_2m) > + entry.gfn != gfn + npages || > + entry.cur_page || > + !!entry.pagesize != huge) > break; > svm->sev_es.psc_inflight++; > - npages += entry_start.pagesize ? 512 : 1; > + npages += huge ? 512 : 1; > } > - vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; > - vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; > - vcpu->run->hypercall.args[0] = gpa; > - vcpu->run->hypercall.args[1] = npages; > - vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE > - ? KVM_MAP_GPA_RANGE_ENCRYPTED > - : KVM_MAP_GPA_RANGE_DECRYPTED; > - vcpu->run->hypercall.args[2] |= entry_start.pagesize > - ? KVM_MAP_GPA_RANGE_PAGE_SZ_2M > - : KVM_MAP_GPA_RANGE_PAGE_SZ_4K; > - vcpu->arch.complete_userspace_io = snp_complete_psc; > + switch (entry_start.operation) { > + case VMGEXIT_PSC_OP_PRIVATE: > + case VMGEXIT_PSC_OP_SHARED: > + vcpu->run->exit_reason = KVM_EXIT_HYPERCALL; > + vcpu->run->hypercall.nr = KVM_HC_MAP_GPA_RANGE; > + vcpu->run->hypercall.args[0] = gfn_to_gpa(gfn); > + vcpu->run->hypercall.args[1] = npages; > + vcpu->run->hypercall.args[2] = entry_start.operation == VMGEXIT_PSC_OP_PRIVATE > + ? KVM_MAP_GPA_RANGE_ENCRYPTED > + : KVM_MAP_GPA_RANGE_DECRYPTED; > + vcpu->run->hypercall.args[2] |= huge > + ? KVM_MAP_GPA_RANGE_PAGE_SZ_2M > + : KVM_MAP_GPA_RANGE_PAGE_SZ_4K; > + vcpu->arch.complete_userspace_io = snp_complete_one_psc; > - return 0; /* forward request to userspace */ > + return 0; /* forward request to userspace */ > -out_resume: > - svm->sev_es.psc_idx = 0; > - svm->sev_es.psc_inflight = 0; > - svm->sev_es.psc_2m = false; > - ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, psc_ret); > + default: > + /* > + * Only shared/private PSC operations are currently supported, so if the > + * entire range consists of unsupported operations (e.g. SMASH/UNSMASH), > + * then consider the entire range completed and avoid exiting to > + * userspace. In theory snp_complete_psc() can be called directly > + * at this point to complete the current range and start the next one, > + * but that could lead to unexpected levels of recursion. > + */ > + __snp_complete_one_psc(svm); > + goto next_range; > + } > - return 1; /* resume guest */ > + unreachable(); > } > static int __sev_snp_update_protected_guest_state(struct kvm_vcpu *vcpu) > >