From: Dexuan Cui <decui@xxxxxxxxxxxxx> Sent: Saturday, April 8, 2023 1:48 PM > > When a TDX guest runs on Hyper-V, the hv_netvsc driver's netvsc_init_buf() > allocates buffers using vzalloc(), and needs to share the buffers with the > host OS by calling set_memory_decrypted(), which is not working for > vmalloc() yet. Add the support by handling the pages one by one. > > Co-developed-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx> > Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx> > --- > arch/x86/coco/tdx/tdx.c | 76 ++++++++++++++++++++++++++++------------- > 1 file changed, 52 insertions(+), 24 deletions(-) > > Changes in v2: > Changed tdx_enc_status_changed() in place. > > Hi, Dave, I checked the huge vmalloc mapping code, but still don't know > how to get the underlying huge page info (if huge page is in use) and > try to use PG_LEVEL_2M/1G in try_accept_page() for vmalloc: I checked > is_vm_area_hugepages() and __vfree() -> __vunmap(), and I think the > underlying page allocation info is internal to the mm code, and there > is no mm API to for me get the info in tdx_enc_status_changed(). > > Changes in v3: > No change since v2. > > Changes in v4: > Added Kirill's Co-developed-by since Kirill helped to improve the > code by adding tdx_enc_status_changed_phys(). > > Thanks Kirill for the clarification on load_unaligned_zeropad()! > > The vzalloc() usage in drivers/net/hyperv/netvsc.c: netvsc_init_buf() > remains the same. It may not worth it to "allocate a vmalloc region, > allocate pages manually", because we have to consider the worst case > where the system is sufferiing from severe memory fragmentation and > we can only allocate multiple single pages. We may not want to > complicate the code in netvsc_init_buf(). We'll support NIC SR-IOV > for TDX VMs on Hyper-V, so the netvsc send/recv buffers won't be > used when the VF NIC is up. > > diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c > index 5574c91541a2d..731be50b3d093 100644 > --- a/arch/x86/coco/tdx/tdx.c > +++ b/arch/x86/coco/tdx/tdx.c > @@ -7,6 +7,7 @@ > #include <linux/cpufeature.h> > #include <linux/export.h> > #include <linux/io.h> > +#include <linux/mm.h> > #include <asm/coco.h> > #include <asm/tdx.h> > #include <asm/vmx.h> > @@ -789,6 +790,34 @@ static bool try_accept_one(phys_addr_t *start, unsigned long > len, > return true; > } > > +static bool try_accept_page(phys_addr_t start, phys_addr_t end) > +{ > + /* > + * For shared->private conversion, accept the page using > + * TDX_ACCEPT_PAGE TDX module call. > + */ > + while (start < end) { > + unsigned long len = end - start; > + > + /* > + * Try larger accepts first. It gives chance to VMM to keep > + * 1G/2M SEPT entries where possible and speeds up process by > + * cutting number of hypercalls (if successful). > + */ > + > + if (try_accept_one(&start, len, PG_LEVEL_1G)) > + continue; > + > + if (try_accept_one(&start, len, PG_LEVEL_2M)) > + continue; > + > + if (!try_accept_one(&start, len, PG_LEVEL_4K)) > + return false; > + } > + > + return true; > +} > + > /* > * Notify the VMM about page mapping conversion. More info about ABI > * can be found in TDX Guest-Host-Communication Interface (GHCI), > @@ -838,6 +867,19 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t > end, bool enc) > return !ret; > } > > +static bool tdx_enc_status_changed_phys(phys_addr_t start, phys_addr_t end, > + bool enc) > +{ > + if (!tdx_map_gpa(start, end, enc)) > + return false; > + > + /* private->shared conversion requires only MapGPA call */ > + if (!enc) > + return true; > + > + return try_accept_page(start, end); > +} > + > /* > * Inform the VMM of the guest's intent for this physical page: shared with > * the VMM or private to the guest. The VMM is expected to change its mapping > @@ -845,37 +887,23 @@ static bool tdx_map_gpa(phys_addr_t start, phys_addr_t > end, bool enc) > */ > static bool tdx_enc_status_changed(unsigned long vaddr, int numpages, bool enc) > { > - phys_addr_t start = __pa(vaddr); > - phys_addr_t end = __pa(vaddr + numpages * PAGE_SIZE); > + unsigned long start = vaddr; > + unsigned long end = start + numpages * PAGE_SIZE; > > - if (!tdx_map_gpa(start, end, enc)) > + if (offset_in_page(start) != 0) > return false; > > - /* private->shared conversion requires only MapGPA call */ > - if (!enc) > - return true; > + if (!is_vmalloc_addr((void *)start)) > + return tdx_enc_status_changed_phys(__pa(start), __pa(end), enc); > > - /* > - * For shared->private conversion, accept the page using > - * TDX_ACCEPT_PAGE TDX module call. > - */ > while (start < end) { > - unsigned long len = end - start; > + phys_addr_t start_pa = slow_virt_to_phys((void *)start); I've usually seen "page_to_phys(vmalloc_to_page(start))" for getting the physical address corresponding to a vmalloc() address. For example, see virt_to_hvpfn(), kvm_kaddr_to_phys(), nx842_get_pa(), rproc_va_to_pa(), etc. Does anyone know if there is a reason to use one vs. the other? slow_virt_to_phys() is x86-only, but that's not a problem here. Otherwise, Reviewed-by: Michael Kelley <mikelley@xxxxxxxxxxxxx> > + phys_addr_t end_pa = start_pa + PAGE_SIZE; > > - /* > - * Try larger accepts first. It gives chance to VMM to keep > - * 1G/2M SEPT entries where possible and speeds up process by > - * cutting number of hypercalls (if successful). > - */ > - > - if (try_accept_one(&start, len, PG_LEVEL_1G)) > - continue; > - > - if (try_accept_one(&start, len, PG_LEVEL_2M)) > - continue; > - > - if (!try_accept_one(&start, len, PG_LEVEL_4K)) > + if (!tdx_enc_status_changed_phys(start_pa, end_pa, enc)) > return false; > + > + start += PAGE_SIZE; > } > > return true; > -- > 2.25.1