Fuad and I can help add some details for the conversion. Hopefully we can figure out some of the plan at plumbers this week. Thanks, Elliot > > Signed-off-by: Ackerley Tng <ackerleytng@xxxxxxxxxx> > > --- > virt/kvm/guest_memfd.c | 131 ++++++++++++++++++++++++++++++++++++++++- > virt/kvm/kvm_main.c | 2 + > virt/kvm/kvm_mm.h | 7 +++ > 3 files changed, 139 insertions(+), 1 deletion(-) > > diff --git a/virt/kvm/guest_memfd.c b/virt/kvm/guest_memfd.c > index 110c4bbb004b..fb292e542381 100644 > --- a/virt/kvm/guest_memfd.c > +++ b/virt/kvm/guest_memfd.c > @@ -129,13 +129,29 @@ static int __kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slo > } > > /** > - * Use the uptodate flag to indicate that the folio is prepared for KVM's usage. > + * Use folio's up-to-date flag to indicate that this folio is prepared for usage > + * by the guest. > + * > + * This flag can be used whether the folio is prepared for PRIVATE or SHARED > + * usage. > */ > static inline void kvm_gmem_mark_prepared(struct folio *folio) > { > folio_mark_uptodate(folio); > } > > +/** > + * Use folio's up-to-date flag to indicate that this folio is not yet prepared for > + * usage by the guest. > + * > + * This flag can be used whether the folio is prepared for PRIVATE or SHARED > + * usage. > + */ > +static inline void kvm_gmem_clear_prepared(struct folio *folio) > +{ > + folio_clear_uptodate(folio); > +} > + > /* > * Process @folio, which contains @gfn, so that the guest can use it. > * The folio must be locked and the gfn must be contained in @slot. > @@ -148,6 +164,12 @@ static int kvm_gmem_prepare_folio(struct kvm *kvm, struct kvm_memory_slot *slot, > pgoff_t index; > int r; > > + /* > + * Defensively zero folio to avoid leaking kernel memory in > + * uninitialized pages. This is important since pages can now be mapped > + * into userspace, where hardware (e.g. TDX) won't be clearing those > + * pages. > + */ > if (folio_test_hugetlb(folio)) { > folio_zero_user(folio, folio->index << PAGE_SHIFT); > } else { > @@ -1017,6 +1039,7 @@ static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) > { > struct inode *inode; > struct folio *folio; > + bool is_prepared; > > inode = file_inode(vmf->vma->vm_file); > if (!kvm_gmem_is_faultable(inode, vmf->pgoff)) > @@ -1026,6 +1049,31 @@ static vm_fault_t kvm_gmem_fault(struct vm_fault *vmf) > if (!folio) > return VM_FAULT_SIGBUS; > > + is_prepared = folio_test_uptodate(folio); > + if (!is_prepared) { > + unsigned long nr_pages; > + unsigned long i; > + > + if (folio_test_hugetlb(folio)) { > + folio_zero_user(folio, folio->index << PAGE_SHIFT); > + } else { > + /* > + * Defensively zero folio to avoid leaking kernel memory in > + * uninitialized pages. This is important since pages can now be > + * mapped into userspace, where hardware (e.g. TDX) won't be > + * clearing those pages. > + * > + * Will probably need a version of kvm_gmem_prepare_folio() to > + * prepare the page for SHARED use. > + */ > + nr_pages = folio_nr_pages(folio); > + for (i = 0; i < nr_pages; i++) > + clear_highpage(folio_page(folio, i)); > + } > + > + kvm_gmem_mark_prepared(folio); > + } > + > vmf->page = folio_file_page(folio, vmf->pgoff); > return VM_FAULT_LOCKED; > } > @@ -1593,6 +1641,87 @@ long kvm_gmem_populate(struct kvm *kvm, gfn_t start_gfn, void __user *src, long > } > EXPORT_SYMBOL_GPL(kvm_gmem_populate); > > +static void kvm_gmem_clear_prepared_range(struct inode *inode, pgoff_t start, > + pgoff_t end) > +{ > + pgoff_t index; > + > + filemap_invalidate_lock_shared(inode->i_mapping); > + > + /* TODO: replace iteration with filemap_get_folios() for efficiency. */ > + for (index = start; index < end;) { > + struct folio *folio; > + > + /* Don't use kvm_gmem_get_folio to avoid allocating */ > + folio = filemap_lock_folio(inode->i_mapping, index); > + if (IS_ERR(folio)) { > + ++index; > + continue; > + } > + > + kvm_gmem_clear_prepared(folio); > + > + index = folio_next_index(folio); > + folio_unlock(folio); > + folio_put(folio); > + } > + > + filemap_invalidate_unlock_shared(inode->i_mapping); > +} > + > +/** > + * Clear the prepared flag for all folios in gfn range [@start, @end) in memslot > + * @slot. > + */ > +static void kvm_gmem_clear_prepared_slot(struct kvm_memory_slot *slot, gfn_t start, > + gfn_t end) > +{ > + pgoff_t start_offset; > + pgoff_t end_offset; > + struct file *file; > + > + file = kvm_gmem_get_file(slot); > + if (!file) > + return; > + > + start_offset = start - slot->base_gfn + slot->gmem.pgoff; > + end_offset = end - slot->base_gfn + slot->gmem.pgoff; > + > + kvm_gmem_clear_prepared_range(file_inode(file), start_offset, end_offset); > + > + fput(file); > +} > + > +/** > + * Clear the prepared flag for all folios for any slot in gfn range > + * [@start, @end) in @kvm. > + */ > +void kvm_gmem_clear_prepared_vm(struct kvm *kvm, gfn_t start, gfn_t end) > +{ > + int i; > + > + lockdep_assert_held(&kvm->slots_lock); > + > + for (i = 0; i < kvm_arch_nr_memslot_as_ids(kvm); i++) { > + struct kvm_memslot_iter iter; > + struct kvm_memslots *slots; > + > + slots = __kvm_memslots(kvm, i); > + kvm_for_each_memslot_in_gfn_range(&iter, slots, start, end) { > + struct kvm_memory_slot *slot; > + gfn_t gfn_start; > + gfn_t gfn_end; > + > + slot = iter.slot; > + gfn_start = max(start, slot->base_gfn); > + gfn_end = min(end, slot->base_gfn + slot->npages); > + > + if (iter.slot->flags & KVM_MEM_GUEST_MEMFD) > + kvm_gmem_clear_prepared_slot(iter.slot, gfn_start, gfn_end); > + } > + } > +} > + > /** > * Returns true if pages in range [@start, @end) in inode @inode have no > * userspace mappings. > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 1a7bbcc31b7e..255d27df7f5c 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -2565,6 +2565,8 @@ static int kvm_vm_set_mem_attributes(struct kvm *kvm, gfn_t start, gfn_t end, > KVM_BUG_ON(r, kvm); > } > > + kvm_gmem_clear_prepared_vm(kvm, start, end); > + > kvm_handle_gfn_range(kvm, &post_set_range); > > out_unlock: > diff --git a/virt/kvm/kvm_mm.h b/virt/kvm/kvm_mm.h > index d8ff2b380d0e..25fd0d9f66cc 100644 > --- a/virt/kvm/kvm_mm.h > +++ b/virt/kvm/kvm_mm.h > @@ -43,6 +43,7 @@ int kvm_gmem_bind(struct kvm *kvm, struct kvm_memory_slot *slot, > void kvm_gmem_unbind(struct kvm_memory_slot *slot); > int kvm_gmem_should_set_attributes(struct kvm *kvm, gfn_t start, gfn_t end, > unsigned long attrs); > +void kvm_gmem_clear_prepared_vm(struct kvm *kvm, gfn_t start, gfn_t end); > #else > static inline void kvm_gmem_init(struct module *module) > { > @@ -68,6 +69,12 @@ static inline int kvm_gmem_should_set_attributes(struct kvm *kvm, gfn_t start, > return 0; > } > > +static inline void kvm_gmem_clear_prepared_slots(struct kvm *kvm, > + gfn_t start, gfn_t end) > +{ > + WARN_ON_ONCE(1); > +} > + > #endif /* CONFIG_KVM_PRIVATE_MEM */ > > #endif /* __KVM_MM_H__ */ > -- > 2.46.0.598.g6f2099f65c-goog >