Re: [PATCH 09/11] KVM: guest_memfd: Add interface for populating gmem pages with user data

Paolo Bonzini <pbonzini@xxxxxxxxxx> · Fri, 26 Apr 2024 07:44:40 +0200

On Thu, Apr 25, 2024 at 6:51 PM Isaku Yamahata <isaku.yamahata@xxxxxxxxx> wrote:
> > AFAIK, unwinding on failure is completely uninteresting, and arguably undesirable,
> > because undoing LAUNCH_UPDATE or PAGE.ADD will affect the measurement, i.e. there
> > is no scenario where deleting pages from guest_memfd would allow a restart/resume
> > of the build process to truly succeed.
>
>
> Just for record.  With the following twist to kvm_gmem_populate,
> KVM_TDX_INIT_MEM_REGION can use kvm_gmem_populate().  For those who are curious,
> I also append the callback implementation at the end.

Nice, thank you very much. Since TDX does not need
HAVE_KVM_GMEM_PREPARE, if I get rid of FGP_CREAT_ONLY it will work for
you, right?

Paolo

>
> --
>
>  include/linux/kvm_host.h | 2 ++
>  virt/kvm/guest_memfd.c   | 3 ++-
>  2 files changed, 4 insertions(+), 1 deletion(-)
>
> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
> index df957c9f9115..7c86b77f8895 100644
> --- a/include/linux/kvm_host.h
> +++ b/include/linux/kvm_host.h
> @@ -2460,6 +2460,7 @@ bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
>   *       (passed to @post_populate, and incremented on each iteration
>   *       if not NULL)
>   * @npages: number of pages to copy from userspace-buffer
> + * @prepare: Allow page allocation to invoke gmem_prepare hook
>   * @post_populate: callback to issue for each gmem page that backs the GPA
>   *                 range
>   * @opaque: opaque data to pass to @post_populate callback
> @@ -2473,6 +2474,7 @@ bool kvm_arch_gmem_prepare_needed(struct kvm *kvm);
>   * Returns the number of pages that were populated.
>   */
>  long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
> +                      bool prepare,
>                        int (*post_populate)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
>                                             void __user *src, int order, void *opaque),
>                        void *opaque);
> diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c
> index 3195ceefe915..18809e6dea8a 100644
> --- a/virt/kvm/guest_memfd.c
> +++ b/virt/kvm/guest_memfd.c
> @@ -638,6 +638,7 @@ static int kvm_gmem_undo_get_pfn(struct file *file, struct kvm_memory_slot *slot
>  }
>
>  long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages,
> +                      bool prepare,
>                        int (*post_populate)(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
>                                             void __user *src, int order, void *opaque),
>                        void *opaque)
> @@ -667,7 +668,7 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t gfn, void __user *src, long npages
>                 gfn_t this_gfn = gfn + i;
>                 kvm_pfn_t pfn;
>
> -               ret = __kvm_gmem_get_pfn(file, slot, this_gfn, &pfn, &max_order, false);
> +               ret = __kvm_gmem_get_pfn(file, slot, this_gfn, &pfn, &max_order, prepare);
>                 if (ret)
>                         break;
>
> --
> 2.43.2
>
>
> Here is the callback for KVM_TDX_INIT_MEM_REGION.
> Note: the caller of kvm_gmem_populate() acquires mutex_lock(&kvm->slots_lock)
> and idx = srcu_read_lock(&kvm->srcu).
>
>
> struct tdx_gmem_post_populate_arg {
>         struct kvm_vcpu *vcpu;
>         __u32 flags;
> };
>
> static int tdx_gmem_post_populate(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
>                                   void __user *src, int order, void *_arg)
> {
>         struct kvm_tdx *kvm_tdx = to_kvm_tdx(kvm);
>         struct tdx_gmem_post_populate_arg *arg = _arg;
>         struct kvm_vcpu *vcpu = arg->vcpu;
>         struct kvm_memory_slot *slot;
>         gpa_t gpa = gfn_to_gpa(gfn);
>         struct page *page;
>         kvm_pfn_t mmu_pfn;
>         int ret, i;
>         u64 err;
>
>         /* Pin the source page. */
>         ret = get_user_pages_fast((unsigned long)src, 1, 0, &page);
>         if (ret < 0)
>                 return ret;
>         if (ret != 1)
>                 return -ENOMEM;
>
>         slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
>         if (!kvm_slot_can_be_private(slot) || !kvm_mem_is_private(kvm, gfn)) {
>                 ret = -EFAULT;
>                 goto out_put_page;
>         }
>
>         read_lock(&kvm->mmu_lock);
>
>         ret = kvm_tdp_mmu_get_walk_private_pfn(vcpu, gpa, &mmu_pfn);
>         if (ret < 0)
>                 goto out;
>         if (ret > PG_LEVEL_4K) {
>                 ret = -EINVAL;
>                 goto out;
>         }
>         if (mmu_pfn != pfn) {
>                 ret = -EAGAIN;
>                 goto out;
>         }
>
>         ret = 0;
>         do {
>                 err = tdh_mem_page_add(kvm_tdx, gpa, pfn_to_hpa(pfn),
>                                        pfn_to_hpa(page_to_pfn(page)), NULL);
>         } while (err == TDX_ERROR_SEPT_BUSY);
>         if (err) {
>                 ret = -EIO;
>                 goto out;
>         }
>
>         WARN_ON_ONCE(!atomic64_read(&kvm_tdx->nr_premapped));
>         atomic64_dec(&kvm_tdx->nr_premapped);
>         tdx_account_td_pages(vcpu->kvm, PG_LEVEL_4K);
>
>         if (arg->flags & KVM_TDX_MEASURE_MEMORY_REGION) {
>                 for (i = 0; i < PAGE_SIZE; i += TDX_EXTENDMR_CHUNKSIZE) {
>                         err = tdh_mr_extend(kvm_tdx, gpa + i, NULL);
>                         if (err) {
>                                 ret = -EIO;
>                                 break;
>                         }
>                 }
>         }
>
> out:
>         read_unlock(&kvm->mmu_lock);
> out_put_page:
>         put_page(page);
>         return ret;
> }
>
> --
> Isaku Yamahata <isaku.yamahata@xxxxxxxxx>
>