From: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> Sent: Tuesday, December 13, 2022 10:33 PM > > Child partitions are free to allocate SynIC message and event page but in > case of root partition it must use the pages allocated by Microsoft > Hypervisor (MSHV). Base address for these pages can be found using > synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs > for nested vs non-nested root partition. > > Signed-off-by: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> You have addressed all my previous comments and those areas look good. But I see a new issue that I had not noticed before. See comment below. > --- > arch/x86/include/asm/hyperv-tlfs.h | 11 +++++ > arch/x86/include/asm/mshyperv.h | 30 +++----------- > arch/x86/kernel/cpu/mshyperv.c | 65 ++++++++++++++++++++++++++++++ > drivers/hv/hv.c | 19 ++++++--- > 4 files changed, 96 insertions(+), 29 deletions(-) > > diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h > index 58c03d18c235..b5019becb618 100644 > --- a/arch/x86/include/asm/hyperv-tlfs.h > +++ b/arch/x86/include/asm/hyperv-tlfs.h > @@ -225,6 +225,17 @@ enum hv_isolation_type { > #define HV_REGISTER_SINT14 0x4000009E > #define HV_REGISTER_SINT15 0x4000009F > > +/* > + * Define synthetic interrupt controller model specific registers for > + * nested hypervisor. > + */ > +#define HV_REGISTER_NESTED_SCONTROL 0x40001080 > +#define HV_REGISTER_NESTED_SVERSION 0x40001081 > +#define HV_REGISTER_NESTED_SIEFP 0x40001082 > +#define HV_REGISTER_NESTED_SIMP 0x40001083 > +#define HV_REGISTER_NESTED_EOM 0x40001084 > +#define HV_REGISTER_NESTED_SINT0 0x40001090 > + > /* > * Synthetic Timer MSRs. Four timers per vcpu. > */ > diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > index 61f0c206bff0..c38e4c66a3ac 100644 > --- a/arch/x86/include/asm/mshyperv.h > +++ b/arch/x86/include/asm/mshyperv.h > @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg) > return false; > } > > -static inline u64 hv_get_register(unsigned int reg) > -{ > - u64 value; > - > - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) > - hv_ghcb_msr_read(reg, &value); > - else > - rdmsrl(reg, value); > - return value; > -} > - > -static inline void hv_set_register(unsigned int reg, u64 value) > -{ > - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { > - hv_ghcb_msr_write(reg, value); > - > - /* Write proxy bit via wrmsl instruction */ > - if (reg >= HV_REGISTER_SINT0 && > - reg <= HV_REGISTER_SINT15) > - wrmsrl(reg, value | 1 << 20); > - } else { > - wrmsrl(reg, value); > - } > -} > +u64 hv_get_register(unsigned int reg); > +void hv_set_register(unsigned int reg, u64 value); > +u64 hv_get_non_nested_register(unsigned int reg); > +void hv_set_non_nested_register(unsigned int reg, u64 value); > > #else /* CONFIG_HYPERV */ > static inline void hyperv_init(void) {} > @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, > } > static inline void hv_set_register(unsigned int reg, u64 value) { } > static inline u64 hv_get_register(unsigned int reg) { return 0; } > +static inline void hv_set_non_nested_register(unsigned int reg, u64 value) { } > +static inline u64 hv_get_non_nested_register(unsigned int reg) { return 0; } > static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, > bool visible) > { > diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c > index f9b78d4829e3..938fc82edf05 100644 > --- a/arch/x86/kernel/cpu/mshyperv.c > +++ b/arch/x86/kernel/cpu/mshyperv.c > @@ -41,7 +41,72 @@ bool hv_root_partition; > bool hv_nested; > struct ms_hyperv_info ms_hyperv; > > +static inline unsigned int hv_get_nested_reg(unsigned int reg) > +{ > + switch (reg) { > + case HV_REGISTER_SIMP: > + return HV_REGISTER_NESTED_SIMP; > + case HV_REGISTER_SIEFP: > + return HV_REGISTER_NESTED_SIEFP; > + case HV_REGISTER_SVERSION: > + return HV_REGISTER_NESTED_SVERSION; > + case HV_REGISTER_SCONTROL: > + return HV_REGISTER_NESTED_SCONTROL; > + case HV_REGISTER_SINT0: > + return HV_REGISTER_NESTED_SINT0; > + case HV_REGISTER_EOM: > + return HV_REGISTER_NESTED_EOM; > + default: > + return reg; > + } > +} > + > #if IS_ENABLED(CONFIG_HYPERV) > +u64 hv_get_non_nested_register(unsigned int reg) > +{ > + u64 value; > + > + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) > + hv_ghcb_msr_read(reg, &value); > + else > + rdmsrl(reg, value); > + return value; > +} > +EXPORT_SYMBOL_GPL(hv_get_non_nested_register); > + > +void hv_set_non_nested_register(unsigned int reg, u64 value) > +{ > + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { > + hv_ghcb_msr_write(reg, value); > + > + /* Write proxy bit via wrmsl instruction */ > + if (reg >= HV_REGISTER_SINT0 && > + reg <= HV_REGISTER_SINT15) > + wrmsrl(reg, value | 1 << 20); > + } else { > + wrmsrl(reg, value); > + } > +} > +EXPORT_SYMBOL_GPL(hv_set_non_nested_register); > + > +u64 hv_get_register(unsigned int reg) > +{ > + if (hv_nested) > + reg = hv_get_nested_reg(reg); > + > + return hv_get_non_nested_register(reg); > +} > +EXPORT_SYMBOL_GPL(hv_get_register); > + > +void hv_set_register(unsigned int reg, u64 value) > +{ > + if (hv_nested) > + reg = hv_get_nested_reg(reg); > + > + hv_set_non_nested_register(reg, value); > +} > +EXPORT_SYMBOL_GPL(hv_set_register); > + > static void (*vmbus_handler)(void); > static void (*hv_stimer0_handler)(void); > static void (*hv_kexec_handler)(void); > diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c > index 4d6480d57546..986814a903ee 100644 > --- a/drivers/hv/hv.c > +++ b/drivers/hv/hv.c > @@ -147,7 +147,7 @@ int hv_synic_alloc(void) > * Synic message and event pages are allocated by paravisor. > * Skip these pages allocation here. > */ > - if (!hv_isolation_type_snp()) { > + if (!hv_isolation_type_snp() && !hv_root_partition) { > hv_cpu->synic_message_page = > (void *)get_zeroed_page(GFP_ATOMIC); > if (hv_cpu->synic_message_page == NULL) { > @@ -188,8 +188,16 @@ void hv_synic_free(void) > struct hv_per_cpu_context *hv_cpu > = per_cpu_ptr(hv_context.cpu_context, cpu); > > - free_page((unsigned long)hv_cpu->synic_event_page); > - free_page((unsigned long)hv_cpu->synic_message_page); > + if (hv_root_partition) { > + if (hv_cpu->synic_event_page != NULL) > + memunmap(hv_cpu->synic_event_page); > + > + if (hv_cpu->synic_message_page != NULL) > + memunmap(hv_cpu->synic_message_page); These memunmap() calls seem to be done in the wrong place. There are two pairs of functions that should be symmetrical unless there's a really good reason otherwise: 1. hv_synic_alloc() and hv_synic_free() 2. hv_synic_enable_regs() and hv_synic_disable_regs() The functions in #1 handle the allocation and freeing of these three pages: synic_event_page, synic_message_page, and post_msg_page. If the synic_event_page and synic_message_page don't need to be allocated because they are provided by the hypervisor or paravisor, then the allocation is skipped in hv_synic_alloc(), and the free_page operations in hv_synic_free() are no-ops if the corresponding pointer is NULL. The functions in #2 should handle the mapping and unmapping in the case of pages provided by the hypervisor or paravisor. It appears that the hv_isolation_type_snp() case does this (mostly) correctly. But your code does the unmap in hv_synic_free(), which isn't symmetrical. Unless there's something unique about the situation when running in the root partition, the unmap should be done in hv_synic_disable_regs() like it is for the SNP case. My "mostly" correctly comment above is because the current code in hv_synic_disable_regs() should be setting hv_cpu->synic_message_page and hv_cpu->synic_event_page to NULL after the unmap is done. Those pointers must be reverted to NULL so that if hv_synic_free() is then run, it won't try to free pages that were provided by the hypervisor or paravisor. Michael > + } else { > + free_page((unsigned long)hv_cpu->synic_event_page); > + free_page((unsigned long)hv_cpu->synic_message_page); > + } > free_page((unsigned long)hv_cpu->post_msg_page); > } > > @@ -214,9 +222,10 @@ void hv_synic_enable_regs(unsigned int cpu) > > /* Setup the Synic's message page */ > simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); > + > simp.simp_enabled = 1; > > - if (hv_isolation_type_snp()) { > + if (hv_isolation_type_snp() || hv_root_partition) { > hv_cpu->synic_message_page > = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, > HV_HYP_PAGE_SIZE, MEMREMAP_WB); > @@ -233,7 +242,7 @@ void hv_synic_enable_regs(unsigned int cpu) > siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); > siefp.siefp_enabled = 1; > > - if (hv_isolation_type_snp()) { > + if (hv_isolation_type_snp() || hv_root_partition) { > hv_cpu->synic_event_page = > memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, > HV_HYP_PAGE_SIZE, MEMREMAP_WB); > -- > 2.25.1