On Sun, May 28, 2023 at 9:21 PM <isaku.yamahata@xxxxxxxxx> wrote: > > From: Isaku Yamahata <isaku.yamahata@xxxxxxxxx> > > +static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, > + enum pg_level level, kvm_pfn_t pfn) > +{ > + int tdx_level = pg_level_to_tdx_sept_level(level); > + struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); > + struct tdx_module_output out; > + gpa_t gpa = gfn_to_gpa(gfn); > + hpa_t hpa = pfn_to_hpa(pfn); > + hpa_t hpa_with_hkid; > + u64 err; > + > + /* TODO: handle large pages. */ > + if (KVM_BUG_ON(level != PG_LEVEL_4K, kvm)) > + return -EINVAL; > + > + if (unlikely(!is_hkid_assigned(kvm_tdx))) { > + /* > + * The HKID assigned to this TD was already freed and cache > + * was already flushed. We don't have to flush again. > + */ > + err = tdx_reclaim_page(hpa, false, 0); > + if (KVM_BUG_ON(err, kvm)) > + return -EIO; > + tdx_unpin(kvm, pfn); > + return 0; > + } > + > + do { > + /* > + * When zapping private page, write lock is held. So no race > + * condition with other vcpu sept operation. Race only with > + * TDH.VP.ENTER. > + */ > + err = tdh_mem_page_remove(kvm_tdx->tdr_pa, gpa, tdx_level, &out); > + } while (unlikely(err == TDX_ERROR_SEPT_BUSY)); > + if (KVM_BUG_ON(err, kvm)) { > + pr_tdx_error(TDH_MEM_PAGE_REMOVE, err, &out); > + return -EIO; > + } > + > + hpa_with_hkid = set_hkid_to_hpa(hpa, (u16)kvm_tdx->hkid); > + do { > + /* > + * TDX_OPERAND_BUSY can happen on locking PAMT entry. Because > + * this page was removed above, other thread shouldn't be > + * repeatedly operating on this page. Just retry loop. > + */ > + err = tdh_phymem_page_wbinvd(hpa_with_hkid); > + } while (unlikely(err == (TDX_OPERAND_BUSY | TDX_OPERAND_ID_RCX))); > + if (KVM_BUG_ON(err, kvm)) { > + pr_tdx_error(TDH_PHYMEM_PAGE_WBINVD, err, NULL); > + return -EIO; > + } It seems like when the TD is destroyed, all the TD private pages are removed with tdx_reclaim_page which also clears the page with movdir64b instruction. But when the page is removed while the TD is alive (in this case), the page content is never cleared with movdir64b which causes any poisoned cache line to be consumed by the host resulting in #MC exceptions in the host context. We should clear the page before returning it back to the free pool by calling tdx_clear_page((unsigned long)__va(hpa)) here. > > + tdx_unpin(kvm, pfn); > + return 0; > +} > + >