On Tue, Jun 27, 2023 at 06:57:28PM +0800, Tianyu Lan wrote: > > "There is no x86 SEV SNP feature(X86_FEATURE_SEV_SNP) flag I'm sure we can arrange such a feature if we need it, this isn't rocket science. Boris? > > support so far and hardware provides MSR_AMD64_SEV register > > to check SEV-SNP capability with MSR_AMD64_SEV_ENABLED bit > > ALTERNATIVE can't work without SEV-SNP x86 feature flag." > > There is no cpuid leaf bit to check AMD SEV-SNP feature. > > > > After some Hyper-V doesn't provides SEV and SEV-ES guest before and so > > may reuse X86_FEATURE_SEV and X86_FEATURE_SEV_ES flag as alternative > > feature check for Hyper-V SEV-SNP guest. Will refresh patch. > > > > Hi Peter: > I tried using alternative for "vmmcall" and CALL_NOSPEC in a single > Inline assembly. The output is different in the SEV-SNP mode. When SEV- > SNP is enabled, thunk_target is not required. While it's necessary in > the non SEV-SNP mode. Do you have any idea how to differentiate outputs in > the single Inline assembly which just like alternative works for > assembler template. This seems to work; it's a bit magical for having a nested ALTERNATIVE but the output seems correct (the outer alternative comes last in .altinstructions and thus has precedence). Sure the [thunk_target] input goes unsed in one of the alteratives, but who cares. static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; u64 output_address = output ? virt_to_phys(output) : 0; u64 hv_status; #ifdef CONFIG_X86_64 if (!hv_hypercall_pg) return U64_MAX; #if 0 if (hv_isolation_type_en_snp()) { __asm__ __volatile__("mov %4, %%r8\n" "vmmcall" : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) : "r" (output_address) : "cc", "memory", "r8", "r9", "r10", "r11"); } else { __asm__ __volatile__("mov %4, %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) : "r" (output_address), THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); } #endif asm volatile("mov %[output], %%r8\n" ALTERNATIVE(CALL_NOSPEC, "vmmcall", X86_FEATURE_SEV_ES) : "=a" (hv_status), ASM_CALL_CONSTRAINT, "+c" (control), "+d" (input_address) : [output] "r" (output_address), THUNK_TARGET(hv_hypercall_pg) : "cc", "memory", "r8", "r9", "r10", "r11"); #else u32 input_address_hi = upper_32_bits(input_address); u32 input_address_lo = lower_32_bits(input_address); u32 output_address_hi = upper_32_bits(output_address); u32 output_address_lo = lower_32_bits(output_address); if (!hv_hypercall_pg) return U64_MAX; __asm__ __volatile__(CALL_NOSPEC : "=A" (hv_status), "+c" (input_address_lo), ASM_CALL_CONSTRAINT : "A" (control), "b" (input_address_hi), "D"(output_address_hi), "S"(output_address_lo), THUNK_TARGET(hv_hypercall_pg) : "cc", "memory"); #endif /* !x86_64 */ return hv_status; } (in actual fact x86_64-defconfig + kvm_guest.config + HYPERV) $ ./scripts/objdump-func defconfig-build/arch/x86/hyperv/mmu.o hv_do_hypercall 0000 0000000000000cd0 <hv_do_hypercall.constprop.0>: 0000 cd0: 48 89 f9 mov %rdi,%rcx 0003 cd3: 31 d2 xor %edx,%edx 0005 cd5: 48 85 f6 test %rsi,%rsi 0008 cd8: 74 1b je cf5 <hv_do_hypercall.constprop.0+0x25> 000a cda: b8 00 00 00 80 mov $0x80000000,%eax 000f cdf: 48 01 c6 add %rax,%rsi 0012 ce2: 72 38 jb d1c <hv_do_hypercall.constprop.0+0x4c> 0014 ce4: 48 c7 c2 00 00 00 80 mov $0xffffffff80000000,%rdx 001b ceb: 48 2b 15 00 00 00 00 sub 0x0(%rip),%rdx # cf2 <hv_do_hypercall.constprop.0+0x22> cee: R_X86_64_PC32 page_offset_base-0x4 0022 cf2: 48 01 f2 add %rsi,%rdx 0025 cf5: 48 8b 35 00 00 00 00 mov 0x0(%rip),%rsi # cfc <hv_do_hypercall.constprop.0+0x2c> cf8: R_X86_64_PC32 hv_hypercall_pg-0x4 002c cfc: 48 85 f6 test %rsi,%rsi 002f cff: 74 0f je d10 <hv_do_hypercall.constprop.0+0x40> 0031 d01: 31 c0 xor %eax,%eax 0033 d03: 49 89 c0 mov %rax,%r8 0036 d06: ff d6 call *%rsi 0038 d08: 90 nop 0039 d09: 90 nop 003a d0a: 90 nop 003b d0b: e9 00 00 00 00 jmp d10 <hv_do_hypercall.constprop.0+0x40> d0c: R_X86_64_PLT32 __x86_return_thunk-0x4 0040 d10: 48 c7 c0 ff ff ff ff mov $0xffffffffffffffff,%rax 0047 d17: e9 00 00 00 00 jmp d1c <hv_do_hypercall.constprop.0+0x4c> d18: R_X86_64_PLT32 __x86_return_thunk-0x4 004c d1c: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # d23 <hv_do_hypercall.constprop.0+0x53> d1f: R_X86_64_PC32 phys_base-0x4 0053 d23: eb cd jmp cf2 <hv_do_hypercall.constprop.0+0x22> $ objdump -wdr -j .altinstr_replacement defconfig-build/arch/x86/hyperv/mmu.o 0000000000000000 <.altinstr_replacement>: 0: f3 48 0f b8 c7 popcnt %rdi,%rax 5: e8 00 00 00 00 call a <.altinstr_replacement+0xa> 6: R_X86_64_PLT32 __x86_indirect_thunk_rsi-0x4 a: 0f ae e8 lfence d: ff d6 call *%rsi f: 0f 01 d9 vmmcall $ ./readelf-section.sh defconfig-build/arch/x86/hyperv/mmu.o altinstructions Relocation section '.rela.altinstructions' at offset 0x5420 contains 8 entries: Offset Info Type Symbol's Value Symbol's Name + Addend 0000000000000000 0000000200000002 R_X86_64_PC32 0000000000000000 .text + 1e3 0000000000000004 0000000700000002 R_X86_64_PC32 0000000000000000 .altinstr_replacement + 0 000000000000000e 0000000200000002 R_X86_64_PC32 0000000000000000 .text + d06 0000000000000012 0000000700000002 R_X86_64_PC32 0000000000000000 .altinstr_replacement + 5 000000000000001c 0000000200000002 R_X86_64_PC32 0000000000000000 .text + d06 0000000000000020 0000000700000002 R_X86_64_PC32 0000000000000000 .altinstr_replacement + a 000000000000002a 0000000200000002 R_X86_64_PC32 0000000000000000 .text + d06 000000000000002e 0000000700000002 R_X86_64_PC32 0000000000000000 .altinstr_replacement + f