Re: [PATCH v19 007/130] x86/virt/tdx: Export SEAMCALL functions

Sean Christopherson <seanjc@xxxxxxxxxx> · Fri, 15 Mar 2024 09:33:20 -0700

On Thu, Mar 14, 2024, Dave Hansen wrote:
> On 3/14/24 18:17, Edgecombe, Rick P wrote:
> > I guess there are three options:
> > 1. Export the low level seamcall function
> > 2. Export a bunch of higher level helper functions
> > 3. Duplicate __seamcall asm in KVM
> > 
> > Letting modules make unrestricted seamcalls is not ideal. Preventing
> > the compiler from inlining the small logic in the static inline helpers
> > is not ideal. Duplicating code is not ideal. Hmm.
> > 
> > I want to say 2 sounds the least worst of the three. But I'm not sure.
> > I'm not sure if x86 folks would like to police new seamcalls, or be
> > bothered by it, either.
> 
> #3 is the only objectively awful one. :)
> 
> In the end, we actually _want_ to have conversations about these things.
>  There are going to be considerations about what functionality should be
> in KVM or the core kernel.  We don't want KVM doing any calls that could
> affect global TDX module state, for instance.

Heh, Like this one?

        static inline u64 tdh_sys_lp_shutdown(void)
        {
        	struct tdx_module_args in = {
        	};

        	return tdx_seamcall(TDH_SYS_LP_SHUTDOWN, &in, NULL);
        }

Which isn't actually used...

> But I'd also defer to the KVM maintainers on this.  They're the ones
> that have to play the symbol exporting game a lot more than I ever do.
> If they cringe at the idea of adding 20 (or whatever) exports, then
> that's a lot more important than the possibility of some other silly
> module abusing the generic exported __seamcall.

I don't care much about exports.  What I do care about is sane code, and while
the current code _looks_ pretty, it's actually quite insane.

I get why y'all put SEAMCALL in assembly subroutines; the macro shenanigans I
originally wrote years ago were their own brand of crazy, and dealing with GPRs
that can't be asm() constraints often results in brittle code.

But the tdx_module_args structure approach generates truly atrocious code.  Yes,
SEAMCALL is inherently slow, but that doesn't mean that we shouldn't at least try
to generate efficient code.  And it's not just efficiency that is lost, the
generated code ends up being much harder to read than it ought to be.

E.g. the seemingly simple

        static inline u64 tdh_mem_page_remove(hpa_t tdr, gpa_t gpa, int level,
        				      struct tdx_module_args *out)
        {
        	struct tdx_module_args in = {
        		.rcx = gpa | level,
        		.rdx = tdr,
        	};

        	return tdx_seamcall_sept(TDH_MEM_PAGE_REMOVE, &in, out);
        }

generates the below monstrosity with gcc-13.  And that's just one SEAMCALL wrapper,
*every* single one generates the same mess.  clang-16 is kinda sorta a little
better, as it at least inlines the helpers that have single callers.

So my feedback is to not worry about the exports, and instead focus on figuring
out a way to make the generated code less bloated and easier to read/debug.

Dump of assembler code for function tdh_mem_page_remove:
   0x0000000000032b20 <+0>:	push   %r15
   0x0000000000032b22 <+2>:	xor    %eax,%eax
   0x0000000000032b24 <+4>:	movabs $0x8000ff0000000006,%r15
   0x0000000000032b2e <+14>:	push   %r14
   0x0000000000032b30 <+16>:	mov    %rcx,%r14
   0x0000000000032b33 <+19>:	mov    $0xb,%ecx
   0x0000000000032b38 <+24>:	push   %r13
   0x0000000000032b3a <+26>:	movslq %edx,%r13
   0x0000000000032b3d <+29>:	push   %r12
   0x0000000000032b3f <+31>:	or     %rsi,%r13
   0x0000000000032b42 <+34>:	mov    $0x11,%r12d
   0x0000000000032b48 <+40>:	push   %rbp
   0x0000000000032b49 <+41>:	movabs $0x8000020300000000,%rbp
   0x0000000000032b53 <+51>:	push   %rbx
   0x0000000000032b54 <+52>:	sub    $0x70,%rsp
   0x0000000000032b58 <+56>:	mov    %rdi,(%rsp)
   0x0000000000032b5c <+60>:	lea    0x18(%rsp),%rdi
   0x0000000000032b61 <+65>:	rep stos %rax,%es:(%rdi)
   0x0000000000032b64 <+68>:	mov    (%rsp),%rax
   0x0000000000032b68 <+72>:	mov    %r13,(%r14)
   0x0000000000032b6b <+75>:	mov    $0xa,%ebx
   0x0000000000032b70 <+80>:	mov    %rax,0x8(%r14)
   0x0000000000032b74 <+84>:	mov    0x18(%rsp),%rax
   0x0000000000032b79 <+89>:	mov    %rax,0x10(%r14)
   0x0000000000032b7d <+93>:	mov    0x20(%rsp),%rax
   0x0000000000032b82 <+98>:	mov    %rax,0x18(%r14)
   0x0000000000032b86 <+102>:	mov    0x28(%rsp),%rax
   0x0000000000032b8b <+107>:	mov    %rax,0x20(%r14)
   0x0000000000032b8f <+111>:	mov    0x30(%rsp),%rax
   0x0000000000032b94 <+116>:	mov    %rax,0x28(%r14)
   0x0000000000032b98 <+120>:	mov    0x38(%rsp),%rax
   0x0000000000032b9d <+125>:	mov    %rax,0x30(%r14)
   0x0000000000032ba1 <+129>:	mov    0x40(%rsp),%rax
   0x0000000000032ba6 <+134>:	mov    %rax,0x38(%r14)
   0x0000000000032baa <+138>:	mov    0x48(%rsp),%rax
   0x0000000000032baf <+143>:	mov    %rax,0x40(%r14)
   0x0000000000032bb3 <+147>:	mov    0x50(%rsp),%rax
   0x0000000000032bb8 <+152>:	mov    %rax,0x48(%r14)
   0x0000000000032bbc <+156>:	mov    0x58(%rsp),%rax
   0x0000000000032bc1 <+161>:	mov    %rax,0x50(%r14)
   0x0000000000032bc5 <+165>:	mov    0x60(%rsp),%rax
   0x0000000000032bca <+170>:	mov    %rax,0x58(%r14)
   0x0000000000032bce <+174>:	mov    0x68(%rsp),%rax
   0x0000000000032bd3 <+179>:	mov    %rax,0x60(%r14)
   0x0000000000032bd7 <+183>:	mov    %r14,%rsi
   0x0000000000032bda <+186>:	mov    $0x1d,%edi
   0x0000000000032bdf <+191>:	call   0x32be4 <tdh_mem_page_remove+196>
   0x0000000000032be4 <+196>:	cmp    %rbp,%rax
   0x0000000000032be7 <+199>:	jne    0x32bfd <tdh_mem_page_remove+221>
   0x0000000000032be9 <+201>:	sub    $0x1,%ebx
   0x0000000000032bec <+204>:	jne    0x32bd7 <tdh_mem_page_remove+183>
   0x0000000000032bee <+206>:	add    $0x70,%rsp
   0x0000000000032bf2 <+210>:	pop    %rbx
   0x0000000000032bf3 <+211>:	pop    %rbp
   0x0000000000032bf4 <+212>:	pop    %r12
   0x0000000000032bf6 <+214>:	pop    %r13
   0x0000000000032bf8 <+216>:	pop    %r14
   0x0000000000032bfa <+218>:	pop    %r15
   0x0000000000032bfc <+220>:	ret
   0x0000000000032bfd <+221>:	cmp    %r15,%rax
   0x0000000000032c00 <+224>:	je     0x32c2a <tdh_mem_page_remove+266>
   0x0000000000032c02 <+226>:	movabs $0x8000020000000092,%rdx
   0x0000000000032c0c <+236>:	cmp    %rdx,%rax
   0x0000000000032c0f <+239>:	jne    0x32bee <tdh_mem_page_remove+206>
   0x0000000000032c11 <+241>:	sub    $0x1,%r12d
   0x0000000000032c15 <+245>:	jne    0x32b64 <tdh_mem_page_remove+68>
   0x0000000000032c1b <+251>:	add    $0x70,%rsp
   0x0000000000032c1f <+255>:	pop    %rbx
   0x0000000000032c20 <+256>:	pop    %rbp
   0x0000000000032c21 <+257>:	pop    %r12
   0x0000000000032c23 <+259>:	pop    %r13
   0x0000000000032c25 <+261>:	pop    %r14
   0x0000000000032c27 <+263>:	pop    %r15
   0x0000000000032c29 <+265>:	ret
   0x0000000000032c2a <+266>:	call   0x32c2f <tdh_mem_page_remove+271>
   0x0000000000032c2f <+271>:	xor    %eax,%eax
   0x0000000000032c31 <+273>:	add    $0x70,%rsp
   0x0000000000032c35 <+277>:	pop    %rbx
   0x0000000000032c36 <+278>:	pop    %rbp
   0x0000000000032c37 <+279>:	pop    %r12
   0x0000000000032c39 <+281>:	pop    %r13
   0x0000000000032c3b <+283>:	pop    %r14
   0x0000000000032c3d <+285>:	pop    %r15
   0x0000000000032c3f <+287>:	ret
End of assembler dump.