On Thu, Apr 25, 2024 at 6:51 PM Isaku Yamahata <isaku.yamahata@xxxxxxxxx> wrote: > > AFAIK, unwinding on failure is completely uninteresting, and arguably undesirable, > > because undoing LAUNCH_UPDATE or PAGE.ADD will affect the measurement, i.e. there > > is no scenario where deleting pages from guest_memfd would allow a restart/resume > > of the build process to truly succeed. > > > Just for record. With the following twist to kvm_gmem_populate, > KVM_TDX_INIT_MEM_REGION can use kvm_gmem_populate(). For those who are curious, > I also append the callback implementation at the end. Nice, thank you very much. Since TDX does not need HAVE_KVM_GMEM_PREPARE, if I get rid of FGP_CREAT_ONLY it will work for you, right? Paolo > > -- > > include/linux/kvm_host.h | 2 ++ > virt/kvm/guest_memfd.c | 3 ++- > 2 files changed, 4 insertions(+), 1 deletion(-) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index df957c9f9115..7c86b77f8895 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -2460,6 +2460,7 @@ bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); > * (passed to @post_populate, and incremented on each iteration > * if not NULL) > * @npages: number of pages to copy from userspace-buffer > + * @prepare: Allow page allocation to invoke gmem_prepare hook > * @post_populate: callback to issue for each gmem page that backs the GPA > * range > * @opaque: opaque data to pass to @post_populate callback > @@ -2473,6 +2474,7 @@ bool kvm_arch_gmem_prepare_needed(struct kvm *kvm); > * Returns the number of pages that were populated. > */ > long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, > + bool prepare, > int (*post_populate)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, > void __user *src, int order, void *opaque), > void *opaque); > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 3195ceefe915..18809e6dea8a 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -638,6 +638,7 @@ static int kvm_gmem_undo_get_pfn(struct file *file, struct kvm_memory_slot *slot > } > > long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages, > + bool prepare, > int (*post_populate)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, > void __user *src, int order, void *opaque), > void *opaque) > @@ -667,7 +668,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages > gfn_t this_gfn = gfn + i; > kvm_pfn_t pfn; > > - ret = __kvm_gmem_get_pfn(file, slot, this_gfn, &pfn, &max_order, false); > + ret = __kvm_gmem_get_pfn(file, slot, this_gfn, &pfn, &max_order, prepare); > if (ret) > break; > > -- > 2.43.2 > > > Here is the callback for KVM_TDX_INIT_MEM_REGION. > Note: the caller of kvm_gmem_populate() acquires mutex_lock(&kvm->slots_lock) > and idx = srcu_read_lock(&kvm->srcu). > > > struct tdx_gmem_post_populate_arg { > struct kvm_vcpu *vcpu; > __u32 flags; > }; > > static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn, > void __user *src, int order, void *_arg) > { > struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm); > struct tdx_gmem_post_populate_arg *arg = _arg; > struct kvm_vcpu *vcpu = arg->vcpu; > struct kvm_memory_slot *slot; > gpa_t gpa = gfn_to_gpa(gfn); > struct page *page; > kvm_pfn_t mmu_pfn; > int ret, i; > u64 err; > > /* Pin the source page. */ > ret = get_user_pages_fast((unsigned long)src, 1, 0, &page); > if (ret < 0) > return ret; > if (ret != 1) > return -ENOMEM; > > slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); > if (!kvm_slot_can_be_private(slot) || !kvm_mem_is_private(kvm, gfn)) { > ret = -EFAULT; > goto out_put_page; > } > > read_lock(&kvm->mmu_lock); > > ret = kvm_tdp_mmu_get_walk_private_pfn(vcpu, gpa, &mmu_pfn); > if (ret < 0) > goto out; > if (ret > PG_LEVEL_4K) { > ret = -EINVAL; > goto out; > } > if (mmu_pfn != pfn) { > ret = -EAGAIN; > goto out; > } > > ret = 0; > do { > err = tdh_mem_page_add(kvm_tdx, gpa, pfn_to_hpa(pfn), > pfn_to_hpa(page_to_pfn(page)), NULL); > } while (err == TDX_ERROR_SEPT_BUSY); > if (err) { > ret = -EIO; > goto out; > } > > WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped)); > atomic64_dec(&kvm_tdx->nr_premapped); > tdx_account_td_pages(vcpu->kvm, PG_LEVEL_4K); > > if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) { > for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) { > err = tdh_mr_extend(kvm_tdx, gpa + i, NULL); > if (err) { > ret = -EIO; > break; > } > } > } > > out: > read_unlock(&kvm->mmu_lock); > out_put_page: > put_page(page); > return ret; > } > > -- > Isaku Yamahata <isaku.yamahata@xxxxxxxxx> >