On 2/8/2021 11:49 AM, Michael Kelley wrote: > From: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> Sent: Friday, November 20, 2020 4:31 PM >> >> Introduce mmap interface for a virtual processor, exposing a page for >> setting and getting common registers while the VP is suspended. >> >> This provides a more performant and convenient way to get and set these >> registers in the context of a vmm's run-loop. >> >> Co-developed-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx> >> Signed-off-by: Lillian Grassin-Drake <ligrassi@xxxxxxxxxxxxx> >> Signed-off-by: Nuno Das Neves <nunodasneves@xxxxxxxxxxxxxxxxxxx> >> --- >> Documentation/virt/mshv/api.rst | 11 ++++ >> arch/x86/include/uapi/asm/hyperv-tlfs.h | 74 ++++++++++++++++++++++ >> include/asm-generic/hyperv-tlfs.h | 10 +++ >> include/linux/mshv.h | 1 + >> include/uapi/asm-generic/hyperv-tlfs.h | 5 ++ >> include/uapi/linux/mshv.h | 12 ++++ >> virt/mshv/mshv_main.c | 82 +++++++++++++++++++++++++ >> 7 files changed, 195 insertions(+) >> >> diff --git a/Documentation/virt/mshv/api.rst b/Documentation/virt/mshv/api.rst >> index 7fd75f248eff..89c276a8778f 100644 >> --- a/Documentation/virt/mshv/api.rst >> +++ b/Documentation/virt/mshv/api.rst >> @@ -149,3 +149,14 @@ HV_PARTITION_CREATION_FLAG_LAPIC_ENABLED >> Get/set various vp state. Currently these can be used to get and set >> emulated LAPIC state, and xsave data. >> >> +3.10 mmap(vp) >> +------------- >> +:Type: vp mmap >> +:Parameters: offset should be HV_VP_MMAP_REGISTERS_OFFSET >> +:Returns: 0 on success >> + >> +Maps a page into userspace that can be used to get and set common registers >> +while the vp is suspended. >> +The page is laid out in struct hv_vp_register_page in asm/hyperv-tlfs.h. >> + > > I'm assuming there's no support for the corresponding munmap(). > What happens if munmap is called? Does it just fail and the page remains > mapped? > munmap() will successfully unmap the page from userspace. The physical state page remains mapped in the hypervisor, tracked in mshv in vp->register_page. This is re-used on subsequent mmap()s. >> + >> diff --git a/arch/x86/include/uapi/asm/hyperv-tlfs.h b/arch/x86/include/uapi/asm/hyperv- >> tlfs.h >> index 78758aedf23e..a241178567ff 100644 >> --- a/arch/x86/include/uapi/asm/hyperv-tlfs.h >> +++ b/arch/x86/include/uapi/asm/hyperv-tlfs.h >> @@ -1110,4 +1110,78 @@ struct hv_vp_state_data_xsave { >> union hv_x64_xsave_xfem_register states; >> }; >> >> +/* Bits for dirty mask of hv_vp_register_page */ >> +#define HV_X64_REGISTER_CLASS_GENERAL 0 >> +#define HV_X64_REGISTER_CLASS_IP 1 >> +#define HV_X64_REGISTER_CLASS_XMM 2 >> +#define HV_X64_REGISTER_CLASS_SEGMENT 3 >> +#define HV_X64_REGISTER_CLASS_FLAGS 4 >> + >> +#define HV_VP_REGISTER_PAGE_VERSION_1 1u >> + >> +struct hv_vp_register_page { >> + __u16 version; >> + bool isvalid; > > Like enum, avoid type "bool" in data structures shared with > Hyper-V. > Indeed - this should be u8. I will change it. >> + __u8 rsvdz; >> + __u32 dirty; >> + union { >> + struct { >> + __u64 rax; >> + __u64 rcx; >> + __u64 rdx; >> + __u64 rbx; >> + __u64 rsp; >> + __u64 rbp; >> + __u64 rsi; >> + __u64 rdi; >> + __u64 r8; >> + __u64 r9; >> + __u64 r10; >> + __u64 r11; >> + __u64 r12; >> + __u64 r13; >> + __u64 r14; >> + __u64 r15; >> + }; >> + >> + __u64 gp_registers[16]; >> + }; >> + __u64 rip; >> + __u64 rflags; >> + union { >> + struct { >> + struct hv_u128 xmm0; >> + struct hv_u128 xmm1; >> + struct hv_u128 xmm2; >> + struct hv_u128 xmm3; >> + struct hv_u128 xmm4; >> + struct hv_u128 xmm5; >> + }; >> + >> + struct hv_u128 xmm_registers[6]; >> + }; >> + union { >> + struct { >> + struct hv_x64_segment_register es; >> + struct hv_x64_segment_register cs; >> + struct hv_x64_segment_register ss; >> + struct hv_x64_segment_register ds; >> + struct hv_x64_segment_register fs; >> + struct hv_x64_segment_register gs; >> + }; >> + >> + struct hv_x64_segment_register segment_registers[6]; >> + }; >> + /* read only */ >> + __u64 cr0; >> + __u64 cr3; >> + __u64 cr4; >> + __u64 cr8; >> + __u64 efer; >> + __u64 dr7; >> + union hv_x64_pending_interruption_register pending_interruption; >> + union hv_x64_interrupt_state_register interrupt_state; >> + __u64 instruction_emulation_hints; >> +}; >> + >> #endif >> diff --git a/include/asm-generic/hyperv-tlfs.h b/include/asm-generic/hyperv-tlfs.h >> index 4bc59a0344ce..9eed4b869110 100644 >> --- a/include/asm-generic/hyperv-tlfs.h >> +++ b/include/asm-generic/hyperv-tlfs.h >> @@ -837,4 +837,14 @@ struct hv_set_vp_state_in { >> union hv_input_set_vp_state_data data[]; >> }; >> >> +struct hv_map_vp_state_page_in { >> + u64 partition_id; >> + u32 vp_index; >> + enum hv_vp_state_page_type type; >> +}; >> + >> +struct hv_map_vp_state_page_out { >> + u64 map_location; /* page number */ >> +}; >> + >> #endif >> diff --git a/include/linux/mshv.h b/include/linux/mshv.h >> index 3933d80294f1..33f4d0cfee11 100644 >> --- a/include/linux/mshv.h >> +++ b/include/linux/mshv.h >> @@ -20,6 +20,7 @@ struct mshv_vp { >> u32 index; >> struct mshv_partition *partition; >> struct mutex mutex; >> + struct page *register_page; >> struct { >> struct semaphore sem; >> struct task_struct *task; >> diff --git a/include/uapi/asm-generic/hyperv-tlfs.h b/include/uapi/asm-generic/hyperv- >> tlfs.h >> index b3c84c69b73f..a747f39b132a 100644 >> --- a/include/uapi/asm-generic/hyperv-tlfs.h >> +++ b/include/uapi/asm-generic/hyperv-tlfs.h >> @@ -92,4 +92,9 @@ enum hv_get_set_vp_state_type { >> HV_GET_SET_VP_STATE_SYNTHETIC_TIMERS = 4, >> }; >> >> +enum hv_vp_state_page_type { >> + HV_VP_STATE_PAGE_REGISTERS = 0, >> + HV_VP_STATE_PAGE_COUNT >> +}; >> + >> #endif >> diff --git a/include/uapi/linux/mshv.h b/include/uapi/linux/mshv.h >> index ae0bb64bbec3..8537ff29aee5 100644 >> --- a/include/uapi/linux/mshv.h >> +++ b/include/uapi/linux/mshv.h >> @@ -13,6 +13,8 @@ >> >> #define MSHV_VERSION 0x0 >> >> +#define MSHV_VP_MMAP_REGISTERS_OFFSET (HV_VP_STATE_PAGE_REGISTERS * 0x1000) >> + >> struct mshv_create_partition { >> __u64 flags; >> struct hv_partition_creation_properties partition_creation_properties; >> @@ -84,4 +86,14 @@ struct mshv_vp_state { >> #define MSHV_GET_VP_STATE _IOWR(MSHV_IOCTL, 0x0A, struct mshv_vp_state) >> #define MSHV_SET_VP_STATE _IOWR(MSHV_IOCTL, 0x0B, struct mshv_vp_state) >> >> +/* register page mapping example: >> + * struct hv_vp_register_page *regs = mmap(NULL, >> + * 4096, >> + * PROT_READ | PROT_WRITE, >> + * MAP_SHARED, >> + * vp_fd, >> + * HV_VP_MMAP_REGISTERS_OFFSET); >> + * munmap(regs, 4096); >> + */ >> + >> #endif >> diff --git a/virt/mshv/mshv_main.c b/virt/mshv/mshv_main.c >> index 70172d9488de..a597254fa4f4 100644 >> --- a/virt/mshv/mshv_main.c >> +++ b/virt/mshv/mshv_main.c >> @@ -43,11 +43,18 @@ static long mshv_partition_ioctl(struct file *filp, unsigned int ioctl, >> unsigned >> static int mshv_dev_open(struct inode *inode, struct file *filp); >> static int mshv_dev_release(struct inode *inode, struct file *filp); >> static long mshv_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); >> +static int mshv_vp_mmap(struct file *file, struct vm_area_struct *vma); >> +static vm_fault_t mshv_vp_fault(struct vm_fault *vmf); >> + >> +static const struct vm_operations_struct mshv_vp_vm_ops = { >> + .fault = mshv_vp_fault, >> +}; >> >> static const struct file_operations mshv_vp_fops = { >> .release = mshv_vp_release, >> .unlocked_ioctl = mshv_vp_ioctl, >> .llseek = noop_llseek, >> + .mmap = mshv_vp_mmap, >> }; >> >> static const struct file_operations mshv_partition_fops = { >> @@ -499,6 +506,47 @@ hv_call_set_vp_registers(u32 vp_index, >> return -hv_status_to_errno(status); >> } >> >> +static int >> +hv_call_map_vp_state_page(u32 vp_index, u64 partition_id, >> + struct page **state_page) >> +{ >> + struct hv_map_vp_state_page_in *input; >> + struct hv_map_vp_state_page_out *output; >> + int status; >> + int ret; >> + unsigned long flags; >> + >> + do { >> + local_irq_save(flags); >> + input = (struct hv_map_vp_state_page_in *)(*this_cpu_ptr( >> + hyperv_pcpu_input_arg)); >> + output = (struct hv_map_vp_state_page_out *)(*this_cpu_ptr( >> + hyperv_pcpu_output_arg)); >> + >> + input->partition_id = partition_id; >> + input->vp_index = vp_index; >> + input->type = HV_VP_STATE_PAGE_REGISTERS; >> + status = hv_do_hypercall(HVCALL_MAP_VP_STATE_PAGE, >> + input, output); >> + >> + if (status != HV_STATUS_INSUFFICIENT_MEMORY) { >> + if (status == HV_STATUS_SUCCESS) >> + *state_page = pfn_to_page(output->map_location); >> + else >> + pr_err("%s: %s\n", __func__, >> + hv_status_to_string(status)); >> + local_irq_restore(flags); >> + ret = -hv_status_to_errno(status); >> + break; >> + } >> + local_irq_restore(flags); >> + >> + ret = hv_call_deposit_pages(NUMA_NO_NODE, partition_id, 1); >> + } while (!ret); >> + >> + return ret; >> +} >> + >> static void >> mshv_isr(void) >> { >> @@ -1155,6 +1203,40 @@ mshv_vp_ioctl(struct file *filp, unsigned int ioctl, unsigned long >> arg) >> return r; >> } >> >> +static vm_fault_t mshv_vp_fault(struct vm_fault *vmf) >> +{ >> + struct mshv_vp *vp = vmf->vma->vm_file->private_data; >> + >> + vmf->page = vp->register_page; >> + >> + return 0; >> +} >> + >> +static int mshv_vp_mmap(struct file *file, struct vm_area_struct *vma) >> +{ >> + int ret; >> + struct mshv_vp *vp = file->private_data; >> + >> + if (vma->vm_pgoff != MSHV_VP_MMAP_REGISTERS_OFFSET) >> + return -EINVAL; >> + >> + if (mutex_lock_killable(&vp->mutex)) >> + return -EINTR; >> + >> + if (!vp->register_page) { >> + ret = hv_call_map_vp_state_page(vp->index, >> + vp->partition->id, >> + &vp->register_page); >> + if (ret) >> + return ret; >> + } >> + >> + mutex_unlock(&vp->mutex); >> + >> + vma->vm_ops = &mshv_vp_vm_ops; >> + return 0; >> +} >> + >> static int >> mshv_vp_release(struct inode *inode, struct file *filp) >> { >> -- >> 2.25.1