hyper-v hypercall supports fast hypercall and xmm fast hypercall where argument is exchanged though regular/xmm registers. This patch implements them and make use of them. With this patch, pci-hyperv.c, hv_apic.c(__send_ipi_mask), and hyperv/mmu.c will use (xmm) fast hypercall. --- arch/x86/hyperv/hv_apic.c | 3 +- arch/x86/hyperv/mmu.c | 4 +- arch/x86/include/asm/hyperv-tlfs.h | 1 + arch/x86/include/asm/mshyperv.h | 184 ++++++++++++++++++++++++++++++++++-- drivers/hv/hv.c | 3 +- drivers/pci/controller/pci-hyperv.c | 5 +- 6 files changed, 186 insertions(+), 14 deletions(-) diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 402338365651..c0cb8180b560 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -173,7 +173,8 @@ static bool __send_ipi_mask(const struct cpumask *mask, int vector) __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask); } - ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL); + ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, sizeof(*ipi_arg), + NULL, 0); ipi_mask_done: local_irq_restore(flags); diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index de27615c51ea..21bf609d01db 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -112,11 +112,11 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, if (info->end == TLB_FLUSH_ALL) { flush->flags |= HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY; status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, - flush, NULL); + flush, sizeof(*flush), NULL, 0); } else if (info->end && ((info->end - info->start)/HV_TLB_FLUSH_UNIT) > max_gvas) { status = hv_do_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, - flush, NULL); + flush, sizeof(*flush), NULL, 0); } else { gva_n = fill_gva_list(flush->gva_list, 0, info->start, info->end); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index b8c89265baf0..267112b24a0c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -123,6 +123,7 @@ * registers is available */ #define HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE (1 << 4) +#define HV_X64_HYPERCALL_OUTPUT_XMM_AVAILABLE (1 << 15) /* Support for a virtual guest idle state is available */ #define HV_X64_GUEST_IDLE_STATE_AVAILABLE (1 << 5) /* Guest crash data handler available */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 5a7375ed5f7c..12f7d5dbd993 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -8,6 +8,7 @@ #include <asm/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> +#include <asm/fpu/api.h> #define VP_INVAL U32_MAX @@ -125,16 +126,13 @@ extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; extern void __percpu **hyperv_pcpu_input_arg; -static inline u64 hv_do_hypercall(u64 control, void *input, void *output) +static inline u64 __hv_do_hypercall(u64 control, void *input, void *output) { u64 input_address = input ? virt_to_phys(input) : 0; u64 output_address = output ? virt_to_phys(output) : 0; u64 hv_status; #ifdef CONFIG_X86_64 - if (!hv_hypercall_pg) - return U64_MAX; - __asm__ __volatile__("mov %4, %%r8\n" CALL_NOSPEC : "=a" (hv_status), ASM_CALL_CONSTRAINT, @@ -148,9 +146,6 @@ static inline u64 hv_do_hypercall(u64 control, void *input, void *output) u32 output_address_hi = upper_32_bits(output_address); u32 output_address_lo = lower_32_bits(output_address); - if (!hv_hypercall_pg) - return U64_MAX; - __asm__ __volatile__(CALL_NOSPEC : "=A" (hv_status), "+c" (input_address_lo), ASM_CALL_CONSTRAINT @@ -194,6 +189,43 @@ static inline u64 hv_do_fast_hypercall8(u16 code, u64 input1) return hv_status; } +/* Fast hypercall with 16 bytes of input and no output */ +static inline u64 hv_do_fast_hypercall16(u16 code, u64 input1, u64 input2) +{ + u64 hv_status, control = (u64)code | HV_HYPERCALL_FAST_BIT; + +#ifdef CONFIG_X86_64 + { + __asm__ __volatile__("mov %4, %%r8\n" + CALL_NOSPEC + : "=a" (hv_status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input1) + : "r" (input2), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "r8", "r9", "r10", "r11"); + } +#else + { + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + u32 input2_hi = upper_32_bits(input2); + u32 input2_lo = lower_32_bits(input2); + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(hv_status), + "+c"(input1_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input1_hi), + "D" (input2_hi), + "S" (input2_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "edi", "esi"); + } +#endif + return hv_status; +} + /* * Rep hypercalls. Callers of this functions are supposed to ensure that * rep_count and varhead_size comply with Hyper-V hypercall definition. @@ -209,7 +241,7 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, control |= (u64)rep_count << HV_HYPERCALL_REP_COMP_OFFSET; do { - status = hv_do_hypercall(control, input, output); + status = __hv_do_hypercall(control, input, output); if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) return status; @@ -226,6 +258,142 @@ static inline u64 hv_do_rep_hypercall(u16 code, u16 rep_count, u16 varhead_size, return status; } +#define HV_XMM_BYTE_MAX 112 + +/* ibytes = fixed header size + var header size + data size in bytes */ +static inline u64 hv_do_xmm_fast_hypercall( + u64 control, void *input, size_t ibytes, void *output, size_t obytes) +{ + u64 status; + u64 input0; + u64 input1; + u64 tmp[12] __aligned((16)); + + BUG_ON(ibytes <= 16); + BUG_ON(roundup(ibytes, 16) + obytes > HV_XMM_BYTE_MAX); + + control |= HV_HYPERCALL_FAST_BIT; + + /* it's assumed that there are at least two inputs */ + input0 = ((u64 *)input)[0]; + input1 = ((u64 *)input)[1]; + /* + * If input is aligned and long enough, we can avoid copy + * and directly store it into xmm registers. + * It's a case when hyperv_pcpu_input_arg is used. + */ + memcpy(tmp, (u8 *)input + 16, ibytes - 16); + memset((u8 *)tmp + (ibytes - 16), 0, sizeof(tmp) - (ibytes - 16)); + + kernel_fpu_begin(); + if (ibytes > 2 * 8) + __asm__ __volatile__("movdqa %0, %%xmm0" : : "m" (tmp[0])); + if (ibytes > 4 * 8) + __asm__ __volatile__("movdqa %0, %%xmm1" : : "m" (tmp[2])); + if (ibytes > 6 * 8) + __asm__ __volatile__("movdqa %0, %%xmm2" : : "m" (tmp[4])); + if (ibytes > 8 * 8) + __asm__ __volatile__("movdqa %0, %%xmm3" : : "m" (tmp[6])); + if (ibytes > 10 * 8) + __asm__ __volatile__("movdqa %0, %%xmm4" : : "m" (tmp[8])); + if (ibytes > 12 * 8) + __asm__ __volatile__("movdqa %0, %%xmm5" : : "m" (tmp[10])); + +#ifdef CONFIG_X86_64 + __asm__ __volatile__("mov %4, %%r8\n" + CALL_NOSPEC + : "=a" (status), ASM_CALL_CONSTRAINT, + "+c" (control), "+d" (input0) + : "r" (input1), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "r8", "r9", "r10", "r11"); +#else + { + u32 input0_hi = upper_32_bits(input0); + u32 input0_lo = lower_32_bits(input0); + u32 input1_hi = upper_32_bits(input1); + u32 input1_lo = lower_32_bits(input1); + + __asm__ __volatile__ (CALL_NOSPEC + : "=A"(status), + "+c"(input0_lo), + ASM_CALL_CONSTRAINT + : "A" (control), + "b" (input0_hi), + "D"(input1_hi), + "S"(input1_lo), + THUNK_TARGET(hv_hypercall_pg) + : "cc", "edi", "esi"); + } +#endif + if (output) { + size_t i_end = roundup(ibytes, 16); + size_t o_end = i_end + obytes; + + if (i_end <= 2 * 8 && 2 * 8 < o_end) + __asm__ __volatile__( + "movdqa %%xmm0, %s" : : "m" (tmp[0])); + if (i_end <= 4 * 8 && 4 * 8 < o_end) + __asm__ __volatile__( + "movdqa %%xmm1, %s" : : "m" (tmp[2])); + if (i_end <= 6 * 8 && 6 * 8 < o_end) + __asm__ __volatile__( + "movdqa %%xmm2, %s" : : "m" (tmp[4])); + if (i_end <= 8 * 8 && 8 * 8 < o_end) + __asm__ __volatile__( + "movdqa %%xmm3, %s" : : "m" (tmp[6])); + if (i_end <= 10 * 8 && 10 * 8 < o_end) + __asm__ __volatile__( + "movdqa %%xmm4, %s" : : "m" (tmp[8])); + if (i_end <= 12 * 8 && 12 * 8 < o_end) + __asm__ __volatile__( + "movdqa %%xmm5, %s" : : "m" (tmp[10])); + memcpy(output, (u8 *)tmp + i_end - 16, obytes); + } + kernel_fpu_end(); + return status; +} + +static inline u64 hv_do_hypercall( + u64 control, void *input, size_t ibytes, void *output, size_t obytes) +{ + if (unlikely(!hv_hypercall_pg)) + return U64_MAX; + + /* fast hypercall */ + if (output == NULL && ibytes <= 16) { + u64 tmp[2]; + + if (ibytes <= 8) { + tmp[0] = 0; + memcpy(&tmp, input, ibytes); + return hv_do_fast_hypercall8((u16)control, tmp[0]); + } + + tmp[1] = 0; + memcpy(tmp, input, ibytes); + return hv_do_fast_hypercall16((u16)control, tmp[0], tmp[1]); + } + + /* xmm fast hypercall */ + if (static_cpu_has(X86_FEATURE_XMM) && + ms_hyperv.features & HV_X64_HYPERCALL_PARAMS_XMM_AVAILABLE && + roundup(ibytes, 16) + obytes <= HV_XMM_BYTE_MAX) { + if (output) { + if (ms_hyperv.features & + HV_X64_HYPERCALL_OUTPUT_XMM_AVAILABLE) + return hv_do_xmm_fast_hypercall( + control, input, ibytes, output, obytes); + } else { + WARN_ON(obytes > 0); + return hv_do_xmm_fast_hypercall( + control, input, ibytes, output, obytes); + } + } + + return __hv_do_hypercall(control, input, output); +} + /* * Hypervisor's notion of virtual processor ID is different from * Linux' notion of CPU ID. This information can only be retrieved diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 658dc765753b..0c7b7a88fdcd 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -92,7 +92,8 @@ int hv_post_message(union hv_connection_id connection_id, aligned_msg->payload_size = payload_size; memcpy((void *)aligned_msg->payload, payload, payload_size); - status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, NULL); + status = hv_do_hypercall(HVCALL_POST_MESSAGE, aligned_msg, + sizeof(*aligned_msg), NULL, 0); /* Preemption must remain disabled until after the hypercall * so some other thread can't get scheduled onto this cpu and diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index f6325f1a89e8..7c229009daba 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -979,8 +979,9 @@ static void hv_irq_unmask(struct irq_data *data) } } - res = hv_do_hypercall(HVCALL_RETARGET_INTERRUPT | (var_size << 17), - params, NULL); + res = hv_do_hypercall( + HVCALL_RETARGET_INTERRUPT | (var_size << 17), + params, sizeof(params) + var_size * 8, NULL, 0); exit_unlock: spin_unlock_irqrestore(&hbus->retarget_msi_interrupt_lock, flags); -- 2.14.1