From: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> Sent: Friday, November 20, 2020 4:31 PM > > Same idea as synic setup in drivers/hv/hv.c:hv_synic_enable_regs() > and hv_synic_disable_regs(). > Setting up synic registers in both vmbus driver and mshv would clobber > them, but the vmbus driver will not run in the root partition, so this > is safe. > > Co-developed-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx> > Signed-off-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx> > Signed-off-by: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/hyperv-tlfs.h | 29 --- > arch/x86/include/uapi/asm/hyperv-tlfs.h | 264 ++++++++++++++++++++++++ > include/asm-generic/hyperv-tlfs.h | 46 +---- > include/linux/mshv.h | 1 + > include/uapi/asm-generic/hyperv-tlfs.h | 43 ++++ > virt/mshv/mshv_main.c | 98 ++++++++- > 6 files changed, 404 insertions(+), 77 deletions(-) > > diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h > index 4cd44ae9bffb..c34a6bb4f457 100644 > --- a/arch/x86/include/asm/hyperv-tlfs.h > +++ b/arch/x86/include/asm/hyperv-tlfs.h > @@ -267,35 +267,6 @@ struct hv_tsc_emulation_status { > #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 > #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 > > - > -/* Define hypervisor message types. */ > -enum hv_message_type { > - HVMSG_NONE = 0x00000000, > - > - /* Memory access messages. */ > - HVMSG_UNMAPPED_GPA = 0x80000000, > - HVMSG_GPA_INTERCEPT = 0x80000001, > - > - /* Timer notification messages. */ > - HVMSG_TIMER_EXPIRED = 0x80000010, > - > - /* Error messages. */ > - HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, > - HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, > - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, > - > - /* Trace buffer complete messages. */ > - HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, > - > - /* Platform-specific processor intercept messages. */ > - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, > - HVMSG_X64_MSR_INTERCEPT = 0x80010001, > - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, > - HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, > - HVMSG_X64_APIC_EOI = 0x80010004, > - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 > -}; > - > struct hv_nested_enlightenments_control { > struct { > __u32 directhypercall:1; > diff --git a/arch/x86/include/uapi/asm/hyperv-tlfs.h b/arch/x86/include/uapi/asm/hyperv- > tlfs.h > index 2ff655962738..c6a27053f791 100644 > --- a/arch/x86/include/uapi/asm/hyperv-tlfs.h > +++ b/arch/x86/include/uapi/asm/hyperv-tlfs.h > @@ -722,4 +722,268 @@ union hv_register_value { > pending_virtualization_fault_event; > }; > > +/* Define hypervisor message types. */ > +enum hv_message_type { > + HVMSG_NONE = 0x00000000, > + > + /* Memory access messages. */ > + HVMSG_UNMAPPED_GPA = 0x80000000, > + HVMSG_GPA_INTERCEPT = 0x80000001, > + > + /* Timer notification messages. */ > + HVMSG_TIMER_EXPIRED = 0x80000010, > + > + /* Error messages. */ > + HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, > + HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, > + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, > + > + /* Trace buffer complete messages. */ > + HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, > + > + /* Platform-specific processor intercept messages. */ > + HVMSG_X64_IO_PORT_INTERCEPT = 0x80010000, > + HVMSG_X64_MSR_INTERCEPT = 0x80010001, > + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, > + HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, > + HVMSG_X64_APIC_EOI = 0x80010004, > + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005, > + HVMSG_X64_IOMMU_PRQ = 0x80010006, > + HVMSG_X64_HALT = 0x80010007, > + HVMSG_X64_INTERRUPTION_DELIVERABLE = 0x80010008, > + HVMSG_X64_SIPI_INTERCEPT = 0x80010009, > +}; I have a separate patch series that moves this enum to the asm-generic portion of hyperv-tlfs.h because there's not a good way to separate the arch neutral from arch dependent values. > + > + > +union hv_x64_vp_execution_state { > + __u16 as_uint16; > + struct { > + __u16 cpl:2; > + __u16 cr0_pe:1; > + __u16 cr0_am:1; > + __u16 efer_lma:1; > + __u16 debug_active:1; > + __u16 interruption_pending:1; > + __u16 vtl:4; > + __u16 enclave_mode:1; > + __u16 interrupt_shadow:1; > + __u16 virtualization_fault_active:1; > + __u16 reserved:2; > + }; > +}; > + > +/* Values for intercept_access_type field */ > +#define HV_INTERCEPT_ACCESS_READ 0 > +#define HV_INTERCEPT_ACCESS_WRITE 1 > +#define HV_INTERCEPT_ACCESS_EXECUTE 2 > + > +struct hv_x64_intercept_message_header { > + __u32 vp_index; > + __u8 instruction_length:4; > + __u8 cr8:4; // only set for exo partitions > + __u8 intercept_access_type; > + union hv_x64_vp_execution_state execution_state; > + struct hv_x64_segment_register cs_segment; > + __u64 rip; > + __u64 rflags; > +}; > + > +#define HV_HYPERCALL_INTERCEPT_MAX_XMM_REGISTERS 6 > + > +struct hv_x64_hypercall_intercept_message { > + struct hv_x64_intercept_message_header header; > + __u64 rax; > + __u64 rbx; > + __u64 rcx; > + __u64 rdx; > + __u64 r8; > + __u64 rsi; > + __u64 rdi; > + struct hv_u128 xmmregisters[HV_HYPERCALL_INTERCEPT_MAX_XMM_REGISTERS]; > + struct { > + __u32 isolated:1; > + __u32 reserved:31; > + }; > +}; > + > +union hv_x64_register_access_info { > + union hv_register_value source_value; > + enum hv_register_name destination_register; > + __u64 source_address; > + __u64 destination_address; > +}; > + > +struct hv_x64_register_intercept_message { > + struct hv_x64_intercept_message_header header; > + struct { > + __u8 is_memory_op:1; > + __u8 reserved:7; > + }; > + __u8 reserved8; > + __u16 reserved16; > + enum hv_register_name register_name; > + union hv_x64_register_access_info access_info; > +}; > + > +union hv_x64_memory_access_info { > + __u8 as_uint8; > + struct { > + __u8 gva_valid:1; > + __u8 gva_gpa_valid:1; > + __u8 hypercall_output_pending:1; > + __u8 tlb_locked_no_overlay:1; > + __u8 reserved:4; > + }; > +}; > + > +union hv_x64_io_port_access_info { > + __u8 as_uint8; > + struct { > + __u8 access_size:3; > + __u8 string_op:1; > + __u8 rep_prefix:1; > + __u8 reserved:3; > + }; > +}; > + > +union hv_x64_exception_info { > + __u8 as_uint8; > + struct { > + __u8 error_code_valid:1; > + __u8 software_exception:1; > + __u8 reserved:6; > + }; > +}; > + > +enum hv_cache_type { > + HV_CACHE_TYPE_UNCACHED = 0, > + HV_CACHE_TYPE_WRITE_COMBINING = 1, > + HV_CACHE_TYPE_WRITE_THROUGH = 4, > + HV_CACHE_TYPE_WRITE_PROTECTED = 5, > + HV_CACHE_TYPE_WRITE_BACK = 6 > +}; > + > +struct hv_x64_memory_intercept_message { > + struct hv_x64_intercept_message_header header; > + enum hv_cache_type cache_type; > + __u8 instruction_byte_count; > + union hv_x64_memory_access_info memory_access_info; > + __u8 tpr_priority; > + __u8 reserved1; > + __u64 guest_virtual_address; > + __u64 guest_physical_address; > + __u8 instruction_bytes[16]; > +}; > + > +struct hv_x64_cpuid_intercept_message { > + struct hv_x64_intercept_message_header header; > + __u64 rax; > + __u64 rcx; > + __u64 rdx; > + __u64 rbx; > + __u64 default_result_rax; > + __u64 default_result_rcx; > + __u64 default_result_rdx; > + __u64 default_result_rbx; > +}; > + > +struct hv_x64_msr_intercept_message { > + struct hv_x64_intercept_message_header header; > + __u32 msr_number; > + __u32 reserved; > + __u64 rdx; > + __u64 rax; > +}; > + > +struct hv_x64_io_port_intercept_message { > + struct hv_x64_intercept_message_header header; > + __u16 port_number; > + union hv_x64_io_port_access_info access_info; > + __u8 instruction_byte_count; > + __u32 reserved; > + __u64 rax; > + __u8 instruction_bytes[16]; > + struct hv_x64_segment_register ds_segment; > + struct hv_x64_segment_register es_segment; > + __u64 rcx; > + __u64 rsi; > + __u64 rdi; > +}; > + > +struct hv_x64_exception_intercept_message { > + struct hv_x64_intercept_message_header header; > + __u16 exception_vector; > + union hv_x64_exception_info exception_info; > + __u8 instruction_byte_count; > + __u32 error_code; > + __u64 exception_parameter; > + __u64 reserved; > + __u8 instruction_bytes[16]; > + struct hv_x64_segment_register ds_segment; > + struct hv_x64_segment_register ss_segment; > + __u64 rax; > + __u64 rcx; > + __u64 rdx; > + __u64 rbx; Is the above the correct ordering (rax, rcd, rdx, rbx)? It's just what you would expect .... > + __u64 rsp; > + __u64 rbp; > + __u64 rsi; > + __u64 rdi; > + __u64 r8; > + __u64 r9; > + __u64 r10; > + __u64 r11; > + __u64 r12; > + __u64 r13; > + __u64 r14; > + __u64 r15; > +}; > + > +struct hv_x64_invalid_vp_register_message { > + __u32 vp_index; > + __u32 reserved; > +}; > + > +struct hv_x64_unrecoverable_exception_message { > + struct hv_x64_intercept_message_header header; > +}; > + > +enum hv_x64_unsupported_feature_code { > + hv_unsupported_feature_intercept = 1, > + hv_unsupported_feature_task_switch_tss = 2 > +}; > + > +struct hv_x64_unsupported_feature_message { > + __u32 vp_index; > + enum hv_x64_unsupported_feature_code feature_code; > + __u64 feature_parameter; > +}; > + > +struct hv_x64_halt_message { > + struct hv_x64_intercept_message_header header; > +}; > + > +enum hv_x64_pending_interruption_type { > + HV_X64_PENDING_INTERRUPT = 0, > + HV_X64_PENDING_NMI = 2, > + HV_X64_PENDING_EXCEPTION = 3 > +}; > + > +struct hv_x64_interruption_deliverable_message { > + struct hv_x64_intercept_message_header header; > + enum hv_x64_pending_interruption_type deliverable_type; > + __u32 rsvd; > +}; > + > +struct hv_x64_sipi_intercept_message { > + struct hv_x64_intercept_message_header header; > + __u32 target_vp_index; > + __u32 interrupt_vector; > +}; > + > +struct hv_x64_apic_eoi_message { > + __u32 vp_index; > + __u32 interrupt_vector; > +}; Same comments as before about enum types, not depending on the compiler to add padding, and marking as __packed. > + > #endif > diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h > index b9295400c20b..e0185c3872a9 100644 > --- a/include/asm-generic/hyperv-tlfs.h > +++ b/include/asm-generic/hyperv-tlfs.h > @@ -241,6 +241,8 @@ static inline const char *hv_status_to_string(enum hv_status status) > /* Valid SynIC vectors are 16-255. */ > #define HV_SYNIC_FIRST_VALID_VECTOR (16) > > +#define HV_SYNIC_INTERCEPTION_SINT_INDEX 0x00000000 > + > #define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) > #define HV_SYNIC_SIMP_ENABLE (1ULL << 0) > #define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) > @@ -250,49 +252,6 @@ static inline const char *hv_status_to_string(enum hv_status > status) > > #define HV_SYNIC_STIMER_COUNT (4) > > -/* Define synthetic interrupt controller message constants. */ > -#define HV_MESSAGE_SIZE (256) > -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) > -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) > - > -/* Define synthetic interrupt controller message flags. */ > -union hv_message_flags { > - __u8 asu8; > - struct { > - __u8 msg_pending:1; > - __u8 reserved:7; > - } __packed; > -}; > - > -/* Define port identifier type. */ > -union hv_port_id { > - __u32 asu32; > - struct { > - __u32 id:24; > - __u32 reserved:8; > - } __packed u; > -}; > - > -/* Define synthetic interrupt controller message header. */ > -struct hv_message_header { > - __u32 message_type; > - __u8 payload_size; > - union hv_message_flags message_flags; > - __u8 reserved[2]; > - union { > - __u64 sender; > - union hv_port_id port; > - }; > -} __packed; > - > -/* Define synthetic interrupt controller message format. */ > -struct hv_message { > - struct hv_message_header header; > - union { > - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; > - } u; > -} __packed; > - > /* Define the synthetic interrupt message page layout. */ > struct hv_message_page { > struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; > @@ -306,7 +265,6 @@ struct hv_timer_message_payload { > __u64 delivery_time; /* When the message was delivered */ > } __packed; > > - > /* Define synthetic interrupt controller flag constants. */ > #define HV_EVENT_FLAGS_COUNT (256 * 8) > #define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) > diff --git a/include/linux/mshv.h b/include/linux/mshv.h > index dfe469f573f9..7709aaa1e064 100644 > --- a/include/linux/mshv.h > +++ b/include/linux/mshv.h > @@ -42,6 +42,7 @@ struct mshv_partition { > }; > > struct mshv { > + struct hv_message_page __percpu **synic_message_page; > struct { > spinlock_t lock; > u64 count; > diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv- > tlfs.h > index e7b09b9f00de..e87389054b68 100644 > --- a/include/uapi/asm-generic/hyperv-tlfs.h > +++ b/include/uapi/asm-generic/hyperv-tlfs.h > @@ -6,6 +6,49 @@ > #define BIT(X) (1ULL << (X)) > #endif > > +/* Define synthetic interrupt controller message constants. */ > +#define HV_MESSAGE_SIZE (256) > +#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) > +#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) > + > +/* Define synthetic interrupt controller message flags. */ > +union hv_message_flags { > + __u8 asu8; > + struct { > + __u8 msg_pending:1; > + __u8 reserved:7; > + }; > +}; > + > +/* Define port identifier type. */ > +union hv_port_id { > + __u32 asu32; > + struct { > + __u32 id:24; > + __u32 reserved:8; > + } u; > +}; > + > +/* Define synthetic interrupt controller message header. */ > +struct hv_message_header { > + enum hv_message_type message_type; > + __u8 payload_size; > + union hv_message_flags message_flags; > + __u8 reserved[2]; > + union { > + __u64 sender; > + union hv_port_id port; > + }; > +}; > + > +/* Define synthetic interrupt controller message format. */ > +struct hv_message { > + struct hv_message_header header; > + union { > + __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; > + } u; > +}; > + > /* Userspace-visible partition creation flags */ > #define HV_PARTITION_CREATION_FLAG_SMT_ENABLED_GUEST BIT(0) > #define HV_PARTITION_CREATION_FLAG_GPA_LARGE_PAGES_DISABLED BIT(3) > diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c > index 2a10137a1e84..c9445d2edb37 100644 > --- a/virt/mshv/mshv_main.c > +++ b/virt/mshv/mshv_main.c > @@ -15,6 +15,8 @@ > #include <linux/file.h> > #include <linux/anon_inodes.h> > #include <linux/mm.h> > +#include <linux/io.h> > +#include <linux/cpuhotplug.h> > #include <linux/mshv.h> > #include <asm/mshyperv.h> > > @@ -1152,23 +1154,111 @@ mshv_dev_release(struct inode *inode, struct file *filp) > return 0; > } > > +static int > +mshv_synic_init(unsigned int cpu) > +{ > + union hv_synic_simp simp; > + union hv_synic_sint sint; > + union hv_synic_scontrol sctrl; > + struct hv_message_page **msg_page = > + this_cpu_ptr(mshv.synic_message_page); > + > + /* Setup the Synic's message page */ > + hv_get_simp(simp.as_uint64); > + simp.simp_enabled = true; > + *msg_page = memremap(simp.base_simp_gpa << PAGE_SHIFT, > + PAGE_SIZE, MEMREMAP_WB); Use HV_HYP_PAGE_SHIFT and HV_HYP_PAGE_SIZE. > + if (!msg_page) { > + pr_err("%s: memremap failed\n", __func__); > + return -EFAULT; > + } > + hv_set_simp(simp.as_uint64); > + > + /* Enable intercepts */ > + sint.as_uint64 = 0; > + sint.vector = HYPERVISOR_CALLBACK_VECTOR; > + sint.masked = false; > + sint.auto_eoi = hv_recommend_using_aeoi(); > + hv_set_synint_state(HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64); > + > + /* Enable global synic bit */ > + hv_get_synic_state(sctrl.as_uint64); > + sctrl.enable = 1; > + hv_set_synic_state(sctrl.as_uint64); > + > + return 0; > +} > + > +static int > +mshv_synic_cleanup(unsigned int cpu) > +{ > + union hv_synic_sint sint; > + union hv_synic_simp simp; > + union hv_synic_scontrol sctrl; > + struct hv_message_page **msg_page = > + this_cpu_ptr(mshv.synic_message_page); > + > + /* Disable the interrupt */ > + hv_get_synint_state(HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64); > + sint.masked = true; > + hv_set_synint_state(HV_SYNIC_INTERCEPTION_SINT_INDEX, sint.as_uint64); > + > + /* Disable Synic's message page */ > + hv_get_simp(simp.as_uint64); > + simp.simp_enabled = false; > + hv_set_simp(simp.as_uint64); > + memunmap(*msg_page); > + > + /* Disable global synic bit */ > + hv_get_synic_state(sctrl.as_uint64); > + sctrl.enable = 0; > + hv_set_synic_state(sctrl.as_uint64); > + > + return 0; > +} > + > +static int mshv_cpuhp_online; > + > static int > __init mshv_init(void) > { > - int r; > + int ret; Ideally, change the name of the variable in the earlier patch so this one isn't cluttered with the change. > > - r = misc_register(&mshv_dev); > - if (r) > + ret = misc_register(&mshv_dev); > + if (ret) { > pr_err("%s: misc device register failed\n", __func__); > + return ret; > + } > + spin_lock_init(&mshv.partitions.lock); > > + mshv.synic_message_page = alloc_percpu(struct hv_message_page *); > + if (!mshv.synic_message_page) { > + pr_err("%s: failed to allocate percpu synic page\n", __func__); > + misc_deregister(&mshv_dev); > + return -ENOMEM; > + } > + > + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "mshv_synic", > + mshv_synic_init, > + mshv_synic_cleanup); > + if (ret < 0) { > + pr_err("%s: failed to setup cpu hotplug state: %i\n", > + __func__, ret); > + return ret; > + } > + > + mshv_cpuhp_online = ret; > spin_lock_init(&mshv.partitions.lock); It looks like the spin lock is being initialized twice. > > - return r; > + return 0; > } > > static void > __exit mshv_exit(void) > { > + cpuhp_remove_state(mshv_cpuhp_online); > + free_percpu(mshv.synic_message_page); > + > misc_deregister(&mshv_dev); > } > > -- > 2.25.1