On Mon, Aug 29, 2022, Peter Gonda wrote: > diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c > index 53b9a509c1d5..de13be62d52d 100644 > --- a/tools/testing/selftests/kvm/lib/kvm_util.c > +++ b/tools/testing/selftests/kvm/lib/kvm_util.c > @@ -1388,6 +1388,58 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, > } > } > > +/* > + * Mask off any special bits from raw GPA > + * > + * Input Args: > + * vm - Virtual Machine > + * gpa_raw - Raw VM physical address > + * > + * Output Args: None > + * > + * Return: > + * GPA with special bits (e.g. shared/encrypted) masked off. > + */ > +vm_paddr_t addr_raw2gpa(struct kvm_vm *vm, vm_paddr_t gpa_raw) > +{ > + if (!vm->memcrypt.has_enc_bit) > + return gpa_raw; > + > + return gpa_raw & ~(1ULL << vm->memcrypt.enc_bit); 1. The notion of stealing GPA bits to tag the page should not be tied to memory encryption. 2. Assuming that the shared vs. private bit is active-high is wrong, e.g. TDX has active-low behavior. 3. "raw" is not super untuitive. 4. addr_gpa2raw() should not exist. It assumes the "raw" address always wants the encryption bit set. 5. enc_by_default is an SEV-centric flag that should not exist outside of x86. I think the easiest and most familiar solution for #1 will be to borrow the kernel's tag/untag terminology for handling virtual addresses that can be tagged for ASAN, e.g. ARM's top-byte-ignore and x86's linear address masking (LAM). So instead of addr_raw2gpa(), just do: static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_vaddr_t gpa) { return gpa & ~vm->gpa_tag_mask; } That way zero-allocating the VM will Just Work. > diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c > index 2e6e61bbe81b..b2df259ce706 100644 > --- a/tools/testing/selftests/kvm/lib/x86_64/processor.c > +++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c > @@ -118,7 +118,7 @@ void virt_arch_pgd_alloc(struct kvm_vm *vm) > > /* If needed, create page map l4 table. */ > if (!vm->pgd_created) { > - vm->pgd = vm_alloc_page_table(vm); > + vm->pgd = addr_gpa2raw(vm, vm_alloc_page_table(vm)); Rather than add "struct vm_memcrypt", I think it makes sense to introduce "struct kvm_vm_arch" and then the x86 implementation can add pte_me_mask, similar to what KVM does for SPTEs. THen this code because vm->pgd = vm_alloc_page_table(vm) | vm->arch.pte_me_mask; That will Just Work for TDX, because its pte_me_mask will be '0', even though it effectively has an encryption bit (active-low). > vm->pgd_created = true; > } > } > @@ -140,13 +140,15 @@ static uint64_t *virt_create_upper_pte(struct kvm_vm *vm, > int target_level) > { > uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level); > + uint64_t paddr_raw = addr_gpa2raw(vm, paddr); > > if (!(*pte & PTE_PRESENT_MASK)) { > *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK; > if (current_level == target_level) > - *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK); > + *pte |= PTE_LARGE_MASK | (paddr_raw & PHYSICAL_PAGE_MASK); > else > - *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; > + *pte |= addr_gpa2raw(vm, vm_alloc_page_table(vm)) & PHYSICAL_PAGE_MASK; Prefer to write this as: *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK; *pte |= vm->arch.pte_me_mask; so that selftests don't assume the encryption bit is stolen from the GPA. > + > } else { > /* > * Entry already present. Assert that the caller doesn't want > @@ -184,6 +186,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) > "Physical address beyond maximum supported,\n" > " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x", > paddr, vm->max_gfn, vm->page_size); > + TEST_ASSERT(addr_raw2gpa(vm, paddr) == paddr, > + "Unexpected bits in paddr: %lx", paddr); > > /* > * Allocate upper level page tables, if not already present. Return > @@ -206,7 +210,8 @@ void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level) > pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K); > TEST_ASSERT(!(*pte & PTE_PRESENT_MASK), > "PTE already present for 4k page at vaddr: 0x%lx\n", vaddr); > - *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK); > + *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | > + (addr_gpa2raw(vm, paddr) & PHYSICAL_PAGE_MASK); And with the above suggestions, this can become something like: if (vm_is_gpa_encrypted(vm, paddr)) *pte |= vm->arch.c_bit; else *pte |= vm->arch.s_bit; where SEV sets c_bit and TDX sets s_bit. > void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr) > @@ -515,7 +520,7 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva) > if (!(pte[index[0]] & PTE_PRESENT_MASK)) > goto unmapped_gva; > > - return (PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); > + return addr_raw2gpa(vm, PTE_GET_PFN(pte[index[0]]) * vm->page_size) + (gva & ~PAGE_MASK); Aha! A use for my rework[*] of this mess! I don't think you need to take a dependency on that work, at a glance the conflicts should be minor, i.e. doesn't matter that much which lands first. [*] https://lore.kernel.org/all/20221006004512.666529-1-seanjc@xxxxxxxxxx