On 6/4/23 07:27, Kai Huang wrote: > TDX introduces a new CPU mode: Secure Arbitration Mode (SEAM). This > mode runs only the TDX module itself or other code to load the TDX > module. > > The host kernel communicates with SEAM software via a new SEAMCALL > instruction. This is conceptually similar to a guest->host hypercall, > except it is made from the host to SEAM software instead. The TDX > module establishes a new SEAMCALL ABI which allows the host to > initialize the module and to manage VMs. > > Add infrastructure to make SEAMCALLs. The SEAMCALL ABI is very similar > to the TDCALL ABI and leverages much TDCALL infrastructure. > > SEAMCALL instruction causes #GP when TDX isn't BIOS enabled, and #UD > when CPU is not in VMX operation. Currently, only KVM code mocks with "mocks"? Did you mean "mucks"? > VMX enabling, and KVM is the only user of TDX. This implementation > chooses to make KVM itself responsible for enabling VMX before using > TDX and let the rest of the kernel stay blissfully unaware of VMX. > > The current TDX_MODULE_CALL macro handles neither #GP nor #UD. The > kernel would hit Oops if SEAMCALL were mistakenly made w/o enabling VMX > first. Architecturally, there is no CPU flag to check whether the CPU > is in VMX operation. Also, if a BIOS were buggy, it could still report > valid TDX private KeyIDs when TDX actually couldn't be enabled. I'm not sure this is a great justification. If the BIOS is lying to the OS, we _should_ oops. How else can this happen other than silly kernel bugs. It's OK to oops in the face of silly kernel bugs. ... > diff --git a/arch/x86/include/asm/tdx.h b/arch/x86/include/asm/tdx.h > index 4dfe2e794411..b489b5b9de5d 100644 > --- a/arch/x86/include/asm/tdx.h > +++ b/arch/x86/include/asm/tdx.h > @@ -8,6 +8,8 @@ > #include <asm/ptrace.h> > #include <asm/shared/tdx.h> > > +#include <asm/trapnr.h> > + > /* > * SW-defined error codes. > * > @@ -18,6 +20,9 @@ > #define TDX_SW_ERROR (TDX_ERROR | GENMASK_ULL(47, 40)) > #define TDX_SEAMCALL_VMFAILINVALID (TDX_SW_ERROR | _UL(0xFFFF0000)) > > +#define TDX_SEAMCALL_GP (TDX_SW_ERROR | X86_TRAP_GP) > +#define TDX_SEAMCALL_UD (TDX_SW_ERROR | X86_TRAP_UD) > + > #ifndef __ASSEMBLY__ > > /* TDX supported page sizes from the TDX module ABI. */ > diff --git a/arch/x86/virt/vmx/tdx/Makefile b/arch/x86/virt/vmx/tdx/Makefile > index 93ca8b73e1f1..38d534f2c113 100644 > --- a/arch/x86/virt/vmx/tdx/Makefile > +++ b/arch/x86/virt/vmx/tdx/Makefile > @@ -1,2 +1,2 @@ > # SPDX-License-Identifier: GPL-2.0-only > -obj-y += tdx.o > +obj-y += tdx.o seamcall.o > diff --git a/arch/x86/virt/vmx/tdx/seamcall.S b/arch/x86/virt/vmx/tdx/seamcall.S > new file mode 100644 > index 000000000000..f81be6b9c133 > --- /dev/null > +++ b/arch/x86/virt/vmx/tdx/seamcall.S > @@ -0,0 +1,52 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#include <linux/linkage.h> > +#include <asm/frame.h> > + > +#include "tdxcall.S" > + > +/* > + * __seamcall() - Host-side interface functions to SEAM software module > + * (the P-SEAMLDR or the TDX module). > + * > + * Transform function call register arguments into the SEAMCALL register > + * ABI. Return TDX_SEAMCALL_VMFAILINVALID if the SEAMCALL itself fails, > + * or the completion status of the SEAMCALL leaf function. Additional > + * output operands are saved in @out (if it is provided by the caller). > + * > + *------------------------------------------------------------------------- > + * SEAMCALL ABI: > + *------------------------------------------------------------------------- > + * Input Registers: > + * > + * RAX - SEAMCALL Leaf number. > + * RCX,RDX,R8-R9 - SEAMCALL Leaf specific input registers. > + * > + * Output Registers: > + * > + * RAX - SEAMCALL completion status code. > + * RCX,RDX,R8-R11 - SEAMCALL Leaf specific output registers. > + * > + *------------------------------------------------------------------------- > + * > + * __seamcall() function ABI: > + * > + * @fn (RDI) - SEAMCALL Leaf number, moved to RAX > + * @rcx (RSI) - Input parameter 1, moved to RCX > + * @rdx (RDX) - Input parameter 2, moved to RDX > + * @r8 (RCX) - Input parameter 3, moved to R8 > + * @r9 (R8) - Input parameter 4, moved to R9 > + * > + * @out (R9) - struct tdx_module_output pointer > + * stored temporarily in R12 (not > + * used by the P-SEAMLDR or the TDX > + * module). It can be NULL. > + * > + * Return (via RAX) the completion status of the SEAMCALL, or > + * TDX_SEAMCALL_VMFAILINVALID. > + */ > +SYM_FUNC_START(__seamcall) > + FRAME_BEGIN > + TDX_MODULE_CALL host=1 > + FRAME_END > + RET > +SYM_FUNC_END(__seamcall) > diff --git a/arch/x86/virt/vmx/tdx/tdx.c b/arch/x86/virt/vmx/tdx/tdx.c > index 2d91e7120c90..e82713dd5d54 100644 > --- a/arch/x86/virt/vmx/tdx/tdx.c > +++ b/arch/x86/virt/vmx/tdx/tdx.c > @@ -12,14 +12,70 @@ > #include <linux/init.h> > #include <linux/errno.h> > #include <linux/printk.h> > +#include <linux/smp.h> > #include <asm/msr-index.h> > #include <asm/msr.h> > #include <asm/tdx.h> > +#include "tdx.h" > > static u32 tdx_global_keyid __ro_after_init; > static u32 tdx_guest_keyid_start __ro_after_init; > static u32 tdx_nr_guest_keyids __ro_after_init; > > +/* > + * Wrapper of __seamcall() to convert SEAMCALL leaf function error code > + * to kernel error code. @seamcall_ret and @out contain the SEAMCALL > + * leaf function return code and the additional output respectively if > + * not NULL. > + */ > +static int __always_unused seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, > + u64 *seamcall_ret, > + struct tdx_module_output *out) > +{ > + int cpu, ret = 0; > + u64 sret; > + > + /* Need a stable CPU id for printing error message */ > + cpu = get_cpu(); > + > + sret = __seamcall(fn, rcx, rdx, r8, r9, out); > + > + /* Save SEAMCALL return code if the caller wants it */ > + if (seamcall_ret) > + *seamcall_ret = sret; > + > + /* SEAMCALL was successful */ > + if (!sret) > + goto out; > + > + switch (sret) { > + case TDX_SEAMCALL_GP: > + pr_err_once("[firmware bug]: TDX is not enabled by BIOS.\n"); > + ret = -ENODEV; > + break; > + case TDX_SEAMCALL_VMFAILINVALID: > + pr_err_once("TDX module is not loaded.\n"); > + ret = -ENODEV; > + break; > + case TDX_SEAMCALL_UD: > + pr_err_once("SEAMCALL failed: CPU %d is not in VMX operation.\n", > + cpu); > + ret = -EINVAL; > + break; > + default: > + pr_err_once("SEAMCALL failed: CPU %d: leaf %llu, error 0x%llx.\n", > + cpu, fn, sret); > + if (out) > + pr_err_once("additional output: rcx 0x%llx, rdx 0x%llx, r8 0x%llx, r9 0x%llx, r10 0x%llx, r11 0x%llx.\n", > + out->rcx, out->rdx, out->r8, > + out->r9, out->r10, out->r11); > + ret = -EIO; > + } > +out: > + put_cpu(); > + return ret; > +} This fails to distinguish two very different things: * A SEAMCALL error and * A SEAMCALL *failure* "Errors" are normal. Hypercalls can return errors and so can SEAMCALLs. No biggie. But SEAMCALL failures are another matter. They mean that something really fundamental is *BROKEN*. Just saying "SEAMCALL was successful" is a bit ambiguous for me. > diff --git a/arch/x86/virt/vmx/tdx/tdx.h b/arch/x86/virt/vmx/tdx/tdx.h > new file mode 100644 > index 000000000000..48ad1a1ba737 > --- /dev/null > +++ b/arch/x86/virt/vmx/tdx/tdx.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _X86_VIRT_TDX_H > +#define _X86_VIRT_TDX_H > + > +#include <linux/types.h> > + > +struct tdx_module_output; > +u64 __seamcall(u64 fn, u64 rcx, u64 rdx, u64 r8, u64 r9, > + struct tdx_module_output *out); > +#endif > diff --git a/arch/x86/virt/vmx/tdx/tdxcall.S b/arch/x86/virt/vmx/tdx/tdxcall.S > index 49a54356ae99..757b0c34be10 100644 > --- a/arch/x86/virt/vmx/tdx/tdxcall.S > +++ b/arch/x86/virt/vmx/tdx/tdxcall.S > @@ -1,6 +1,7 @@ > /* SPDX-License-Identifier: GPL-2.0 */ > #include <asm/asm-offsets.h> > #include <asm/tdx.h> > +#include <asm/asm.h> > > /* > * TDCALL and SEAMCALL are supported in Binutils >= 2.36. > @@ -45,6 +46,7 @@ > /* Leave input param 2 in RDX */ > > .if \host > +1: > seamcall > /* > * SEAMCALL instruction is essentially a VMExit from VMX root > @@ -57,10 +59,23 @@ > * This value will never be used as actual SEAMCALL error code as > * it is from the Reserved status code class. > */ > - jnc .Lno_vmfailinvalid > + jnc .Lseamcall_out > mov $TDX_SEAMCALL_VMFAILINVALID, %rax > -.Lno_vmfailinvalid: > + jmp .Lseamcall_out > +2: > + /* > + * SEAMCALL caused #GP or #UD. By reaching here %eax contains > + * the trap number. Convert the trap number to the TDX error > + * code by setting TDX_SW_ERROR to the high 32-bits of %rax. > + * > + * Note cannot OR TDX_SW_ERROR directly to %rax as OR instruction > + * only accepts 32-bit immediate at most. > + */ > + mov $TDX_SW_ERROR, %r12 > + orq %r12, %rax I think the justification for doing the #UD/#GP handling is a bit weak. In the end, it gets us a nicer error message. Is that error message *REALLY* needed? Or is an oops OK in the very rare circumstance that the BIOS is totally buggy?