A TDX VM on Hyper-V may run in TD mode or Partitioned TD mode (L2). For the former, the VM has not enabled the Hyper-V TSC page (which is defined in drivers/clocksource/hyperv_timer.c: "... tsc_pg __bss_decrypted ...") because, for such a VM, the hypervisor requires that the page should be shared, but currently the __bss_decrypted is not working for such a VM yet. Hyper-V TSC page can work as a clocksource device similar to KVM pv clock, and it's also used by the Hyper-V timer code to get the current time: see hv_init_tsc_clocksource(), which sets the global function pointer hv_read_reference_counter to read_hv_clock_tsc(); when Hyper-V TSC page is not enabled, hv_read_reference_counter defaults to be drivers/hv/hv_common.c: __hv_read_ref_counter(), which is suboptimal as it uses the slow MSR interface to get the time info. The attribute __bss_decrypted was added for a native SNP VM by the commit 45f46b1ac95e ("clocksource: hyper-v: Mark hyperv tsc page unencrypted in sev-snp enlightened guest") The attribute works for SNP because of the commit below commit b3f0907c71e0 ("x86/mm: Add .bss..decrypted section to hold shared variables") The attribute is not working for TDX because __startup_64() -> sme_postprocess_startup() is not for TDX; we can't just call set_memory_decrypted() in sme_postprocess_startup() because sme_postprocess_startup() runs too early and set_memory_decrypted() is not working there yet. This RFC patch calls set_memory_decrypted() in a later place, i.e., in start_kernel() -> setup_arch() -> init_hypervisor_platform() -> ms_hyperv_init_platform(), so set_memory_decrypted() works there; accordingly, mem_encrypt_free_decrypted_mem() -> set_memory_encrypted() must be called for TDX now. When a TDX VM runs in Partitioned TD mode (L2), the Hyper-V TSC page should be a private page, so set_memory_decrypted() should not be called for the page in such a VM. Introduce a global variable "tdx_partitioned_td_l2" to handle this type of VM differently. BTW, the 4KB Hyper-V TSC page is enabled very early in hv_init_tsc_clocksource(), where set_memory_decrypted() is not working yet, so we can't simply call set_memory_decrypted() in hv_init_tsc_clocksource() for a TDX VM in TD mode, and we need to get the attribute __bss_decrypted to work for such a VM. The changes to arch/x86/kernel/cpu/mshyperv.c and arch/x86/mm/mem_encrypt_amd.c are not satisfying to me. I wish there could be a better way to support __bss_decrypted for a native TDX VM so that a TDX VM on KVM could also benefit from __bss_decrypted, if some one wants to use it in future. BTW, kvm_init_platform() has similar code for SNP. This is just a RFC patch. I apprecite your insight. Thanks! Signed-off-by: Dexuan Cui <decui@xxxxxxxxxxxxx> --- arch/x86/coco/core.c | 15 +++++++++++++++ arch/x86/coco/tdx/tdx.c | 2 ++ arch/x86/hyperv/ivm.c | 3 ++- arch/x86/include/asm/tdx.h | 1 + arch/x86/kernel/cpu/mshyperv.c | 8 ++++++-- arch/x86/mm/mem_encrypt_amd.c | 3 ++- 6 files changed, 28 insertions(+), 4 deletions(-) diff --git a/arch/x86/coco/core.c b/arch/x86/coco/core.c index b31ef2424d194..61cec405f1084 100644 --- a/arch/x86/coco/core.c +++ b/arch/x86/coco/core.c @@ -15,6 +15,7 @@ #include <asm/archrandom.h> #include <asm/coco.h> +#include <asm/tdx.h> #include <asm/processor.h> enum cc_vendor cc_vendor __ro_after_init = CC_VENDOR_NONE; @@ -25,8 +26,22 @@ static struct cc_attr_flags { __resv : 63; } cc_flags; +static bool noinstr intel_cc_platform_td_l2(enum cc_attr attr) +{ + switch (attr) { + case CC_ATTR_GUEST_MEM_ENCRYPT: + case CC_ATTR_MEM_ENCRYPT: + return true; + default: + return false; + } +} + static bool noinstr intel_cc_platform_has(enum cc_attr attr) { + if (tdx_partitioned_td_l2) + return intel_cc_platform_td_l2(attr); + switch (attr) { case CC_ATTR_GUEST_UNROLL_STRING_IO: case CC_ATTR_HOTPLUG_DISABLED: diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index abf3cd591afd3..8e6ab42add7c0 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -39,6 +39,8 @@ #define TDREPORT_SUBTYPE_0 0 +bool tdx_partitioned_td_l2 __ro_after_init; + /* Called from __tdx_hypercall() for unrecoverable failure */ noinstr void __noreturn __tdx_hypercall_failed(void) { diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 768d73de0d098..52cd44e507846 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -626,7 +626,7 @@ static bool hv_is_private_mmio(u64 addr) return false; } -void __init hv_vtom_init(void) +void __init hv_vtom_init(void) /* TODO: rename the function for TDX */ { enum hv_isolation_type type = hv_get_isolation_type(); @@ -650,6 +650,7 @@ void __init hv_vtom_init(void) case HV_ISOLATION_TYPE_TDX: cc_vendor = CC_VENDOR_INTEL; + tdx_partitioned_td_l2 = true; break; default: diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h index eba178996d845..ddcc9ef82dc99 100644 --- a/arch/x86/include/asm/tdx.h +++ b/arch/x86/include/asm/tdx.h @@ -66,6 +66,7 @@ int tdx_mcall_get_report0(u8 *reportdata, u8 *tdreport); u64 tdx_hcall_get_quote(u8 *buf, size_t size); +extern bool tdx_partitioned_td_l2; #else static inline void tdx_early_init(void) { }; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e0fd57a8ba840..7c336bc020c9f 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -18,6 +18,7 @@ #include <linux/kexec.h> #include <linux/i8253.h> #include <linux/random.h> +#include <linux/set_memory.h> #include <asm/processor.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> @@ -449,8 +450,11 @@ static void __init ms_hyperv_init_platform(void) ms_hyperv.hints &= ~HV_X64_APIC_ACCESS_RECOMMENDED; if (!ms_hyperv.paravisor_present) { - /* To be supported: more work is required. */ - ms_hyperv.features &= ~HV_MSR_REFERENCE_TSC_AVAILABLE; + unsigned long vaddr = (unsigned long)__start_bss_decrypted; + unsigned long vaddr_end = (unsigned long)__end_bss_decrypted; + + for (; vaddr < vaddr_end; vaddr += PMD_SIZE) + set_memory_decrypted(vaddr, PMD_SIZE >> PAGE_SHIFT); /* HV_MSR_CRASH_CTL is unsupported. */ ms_hyperv.misc_features &= ~HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 422602f6039b8..0ddb9e5d222c3 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -25,6 +25,7 @@ #include <asm/fixmap.h> #include <asm/setup.h> #include <asm/mem_encrypt.h> +#include <asm/tdx.h> #include <asm/bootparam.h> #include <asm/set_memory.h> #include <asm/cacheflush.h> @@ -529,7 +530,7 @@ void __init mem_encrypt_free_decrypted_mem(void) * CC_ATTR_MEM_ENCRYPT, aren't necessarily equivalent in a Hyper-V VM * using vTOM, where sme_me_mask is always zero. */ - if (sme_me_mask) { + if (sme_me_mask || (cc_vendor == CC_VENDOR_INTEL && !tdx_partitioned_td_l2)) { r = set_memory_encrypted(vaddr, npages); if (r) { pr_warn("failed to free unused decrypted pages\n"); -- 2.25.1