On Wed, Jan 08, 2025 at 12:37:17PM -0800, Roman Kisel wrote: > > > On 1/8/2025 11:17 AM, Stanislav Kinsburskii wrote: > > On Tue, Jan 07, 2025 at 03:11:15PM -0800, Roman Kisel wrote: > > [...] > > > > > > > Avoiding using the output hypercall page leads to something like[1] > > > and it looks quite complicated although that's the bare bones, lots > > > of notes. > > > > > > > How is this related to the original discussion? > > I was looking for ways to eliminate what I perceived as the source of > friction in the discussion -- allocating the hypercall output page. > No, output page allocation is the current solution and it is fine. The source of friction is allocation of this page under config option in runtime. Thanks, Stas > > My concern was about the piece allocating of the output page guarded by > > the VTL config option.>> Thanks, > > Stas > > > > > [1] > > > > > > /* > > > * Fast extended hypercall with 20 bytes of input and 16 bytes of > > > * output for getting a VP register. > > > * > > > * NOTES: > > > * 1. The function is __init only atm, so the XMM context isn't > > > * used by the user mode. > > > * 2. X86_64 only. > > > * 3. Fast extended hypercalls may use XMM0..XMM6, and XMM is > > > * architerctural on X86_64 yet the support should be enabled > > > * in the CR's. Here, need RDX, R8 and XMM0 for input and RDX, > > > * R8 for output > > > * 4. No provisions for TDX and SEV-SNP for the sake of simplicity > > > * (the hypervisor cannot see the guest registers in the > > > * confidential VM), would need to fallback. > > > * 5. The robust implementation would need to check if fast extended > > > * hypercalls are available by checking the synthehtic CPUID leaves. > > > * A separate leaf indicates fast output support. > > > * It _almost_ certainly has to be, unless somehow disabled, hard > > > * to see why that would be needed. > > > */ > > > struct hv_u128 { > > > u64 low_part; > > > u64 high_part; > > > } __packed; > > > > > > static __init u64 hv_vp_get_register_xfast(u32 reg_name, > > > struct hv_u128 *value) > > > { > > > u64 control = HV_HYPERCALL_REP_COMP_1 | HVCALL_GET_VP_REGISTERS | > > > HV_HYPERCALL_FAST_BIT; > > > unsigned long flags; > > > u64 hv_status; > > > > > > union { > > > struct hv_get_vp_registers_input input; > > > struct { > > > u64 lo; > > > u64 hi; > > > } __packed as_u128; > > > } hv_input; > > > register u64 rdx asm("rdx"); > > > register u64 r8 asm("r8"); > > > register u64 r12 asm("r12"); > > > > > > local_irq_save(flags); > > > > > > hv_input.as_u128.lo = hv_input.as_u128.hi = 0; > > > hv_input.input.header.partitionid = HV_PARTITION_ID_SELF; > > > hv_input.input.header.vpindex = HV_VP_INDEX_SELF; > > > hv_input.input.header.inputvtl = 0; > > > > > > rdx = hv_input.as_u128.lo; > > > r8 = hv_input.as_u128.hi; > > > r12 = reg_name; > > > > > > __asm__ __volatile__( > > > "subq $16, %%rsp\n" > > > "movups %%xmm0, 16(%%rsp)\n" > > > "movd %%r12, %%xmm0\n" > > > CALL_NOSPEC > > > "movups 16(%%rsp), %%xmm0\n" > > > "addq $16, %%rsp\n" > > > : "=a" (hv_status), ASM_CALL_CONSTRAINT, > > > "+c" (control), "+r" (rdx), "+r" (r8) > > > : THUNK_TARGET(hv_hypercall_pg), "r"(r12) > > > : "cc", "memory", "r9", "r10", "r11"); > > > > > > if (hv_result_success(hv_status)) { > > > value->low_part = rdx; > > > value->high_part = r8; > > > } > > > > > > local_irq_restore(flags); > > > return hv_status; > > > } > > > > > > #if IS_ENABLED(CONFIG_HYPERV_VTL_MODE) > > > u8 __init get_vtl(void) > > > { > > > struct hv_u128 reg_value; > > > u64 ret = hv_vp_get_register_xfast(HV_REGISTER_VSM_VP_STATUS, ®_value); > > > > > > if (hv_result_success(ret)) { > > > ret = reg_value.low_part & HV_VTL_MASK; > > > } else { > > > pr_err("Failed to get VTL(error: %lld) exiting...\n", ret); > > > BUG(); > > > } > > > > > > return ret; > > > } > > > #endif > > > > > > > > > > > Thanks, > > > > Stas > > > > > > -- > > > Thank you, > > > Roman > > > > > -- > Thank you, > Roman >