Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> writes: > Child partitions are free to allocate SynIC message and event page but in > case of root partition it must use the pages allocated by Microsoft > Hypervisor (MSHV). Base address for these pages can be found using > synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs > for nested vs non-nested root partition. > > Signed-off-by: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> > --- > arch/x86/include/asm/hyperv-tlfs.h | 11 ++++++ > drivers/hv/hv.c | 55 ++++++++++++++++++------------ > 2 files changed, 45 insertions(+), 21 deletions(-) > > diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h > index d9a611565859..0319091e2019 100644 > --- a/arch/x86/include/asm/hyperv-tlfs.h > +++ b/arch/x86/include/asm/hyperv-tlfs.h > @@ -225,6 +225,17 @@ enum hv_isolation_type { > #define HV_REGISTER_SINT14 0x4000009E > #define HV_REGISTER_SINT15 0x4000009F > > +/* > + * Define synthetic interrupt controller model specific registers for > + * nested hypervisor. > + */ > +#define HV_REGISTER_NESTED_SCONTROL 0x40001080 > +#define HV_REGISTER_NESTED_SVERSION 0x40001081 > +#define HV_REGISTER_NESTED_SIEFP 0x40001082 > +#define HV_REGISTER_NESTED_SIMP 0x40001083 > +#define HV_REGISTER_NESTED_EOM 0x40001084 > +#define HV_REGISTER_NESTED_SINT0 0x40001090 > + > /* > * Synthetic Timer MSRs. Four timers per vcpu. > */ > diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c > index 4d6480d57546..92ee910561c4 100644 > --- a/drivers/hv/hv.c > +++ b/drivers/hv/hv.c > @@ -25,6 +25,11 @@ > /* The one and only */ > struct hv_context hv_context; > > +#define REG_SIMP (hv_nested ? HV_REGISTER_NESTED_SIMP : HV_REGISTER_SIMP) > +#define REG_SIEFP (hv_nested ? HV_REGISTER_NESTED_SIEFP : HV_REGISTER_SIEFP) > +#define REG_SCTRL (hv_nested ? HV_REGISTER_NESTED_SCONTROL : HV_REGISTER_SCONTROL) > +#define REG_SINT0 (hv_nested ? HV_REGISTER_NESTED_SINT0 : HV_REGISTER_SINT0) > + > /* > * hv_init - Main initialization routine. > * > @@ -147,7 +152,7 @@ int hv_synic_alloc(void) > * Synic message and event pages are allocated by paravisor. > * Skip these pages allocation here. > */ > - if (!hv_isolation_type_snp()) { > + if (!hv_isolation_type_snp() && !hv_root_partition) { > hv_cpu->synic_message_page = > (void *)get_zeroed_page(GFP_ATOMIC); > if (hv_cpu->synic_message_page == NULL) { > @@ -188,8 +193,16 @@ void hv_synic_free(void) > struct hv_per_cpu_context *hv_cpu > = per_cpu_ptr(hv_context.cpu_context, cpu); > > - free_page((unsigned long)hv_cpu->synic_event_page); > - free_page((unsigned long)hv_cpu->synic_message_page); > + if (hv_root_partition) { > + if (hv_cpu->synic_event_page != NULL) > + memunmap(hv_cpu->synic_event_page); > + > + if (hv_cpu->synic_message_page != NULL) > + memunmap(hv_cpu->synic_message_page); > + } else { > + free_page((unsigned long)hv_cpu->synic_event_page); > + free_page((unsigned long)hv_cpu->synic_message_page); > + } > free_page((unsigned long)hv_cpu->post_msg_page); > } > > @@ -213,10 +226,10 @@ void hv_synic_enable_regs(unsigned int cpu) > union hv_synic_scontrol sctrl; > > /* Setup the Synic's message page */ > - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); > + simp.as_uint64 = hv_get_register(REG_SIMP); To avoid all this code churn (here and in the next patch dealing with EOM), would it make sense to move the logic picking nested/non-nested register into hv_{get,set}_register() instead? E.g. something like (untested, incomplete): static inline u32 hv_get_nested_reg(u32 reg) { switch (reg) { HV_REGISTER_SIMP: return HV_REGISTER_NESTED_SIMP; HV_REGISTER_NESTED_SVERSION: return HV_REGISTER_NESTED_SVERSION; ... default: return reg; } } static inline u64 hv_get_register(unsigned int reg) { u64 value; if (hv_nested) reg = hv_get_nested_reg(reg); if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) hv_ghcb_msr_read(reg, &value); else rdmsrl(reg, value); return value; } static inline void hv_set_register(unsigned int reg, u64 value) { if (hv_nested) reg = hv_get_nested_reg(reg); if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { hv_ghcb_msr_write(reg, value); /* Write proxy bit via wrmsl instruction */ if (reg >= HV_REGISTER_SINT0 && reg <= HV_REGISTER_SINT15) wrmsrl(reg, value | 1 << 20); } else { wrmsrl(reg, value); } } > simp.simp_enabled = 1; > > - if (hv_isolation_type_snp()) { > + if (hv_isolation_type_snp() || hv_root_partition) { > hv_cpu->synic_message_page > = memremap(simp.base_simp_gpa << HV_HYP_PAGE_SHIFT, > HV_HYP_PAGE_SIZE, MEMREMAP_WB); > @@ -227,13 +240,13 @@ void hv_synic_enable_regs(unsigned int cpu) > >> HV_HYP_PAGE_SHIFT; > } > > - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); > + hv_set_register(REG_SIMP, simp.as_uint64); > > /* Setup the Synic's event page */ > - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); > + siefp.as_uint64 = hv_get_register(REG_SIEFP); > siefp.siefp_enabled = 1; > > - if (hv_isolation_type_snp()) { > + if (hv_isolation_type_snp() || hv_root_partition) { > hv_cpu->synic_event_page = > memremap(siefp.base_siefp_gpa << HV_HYP_PAGE_SHIFT, > HV_HYP_PAGE_SIZE, MEMREMAP_WB); > @@ -245,12 +258,12 @@ void hv_synic_enable_regs(unsigned int cpu) > >> HV_HYP_PAGE_SHIFT; > } > > - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); > + hv_set_register(REG_SIEFP, siefp.as_uint64); > > /* Setup the shared SINT. */ > if (vmbus_irq != -1) > enable_percpu_irq(vmbus_irq, 0); > - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + > + shared_sint.as_uint64 = hv_get_register(REG_SINT0 + > VMBUS_MESSAGE_SINT); > > shared_sint.vector = vmbus_interrupt; > @@ -266,14 +279,14 @@ void hv_synic_enable_regs(unsigned int cpu) > #else > shared_sint.auto_eoi = 0; > #endif > - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, > + hv_set_register(REG_SINT0 + VMBUS_MESSAGE_SINT, > shared_sint.as_uint64); > > /* Enable the global synic bit */ > - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); > + sctrl.as_uint64 = hv_get_register(REG_SCTRL); > sctrl.enable = 1; > > - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); > + hv_set_register(REG_SCTRL, sctrl.as_uint64); > } > > int hv_synic_init(unsigned int cpu) > @@ -297,17 +310,17 @@ void hv_synic_disable_regs(unsigned int cpu) > union hv_synic_siefp siefp; > union hv_synic_scontrol sctrl; > > - shared_sint.as_uint64 = hv_get_register(HV_REGISTER_SINT0 + > + shared_sint.as_uint64 = hv_get_register(REG_SINT0 + > VMBUS_MESSAGE_SINT); > > shared_sint.masked = 1; > > /* Need to correctly cleanup in the case of SMP!!! */ > /* Disable the interrupt */ > - hv_set_register(HV_REGISTER_SINT0 + VMBUS_MESSAGE_SINT, > + hv_set_register(REG_SINT0 + VMBUS_MESSAGE_SINT, > shared_sint.as_uint64); > > - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); > + simp.as_uint64 = hv_get_register(REG_SIMP); > /* > * In Isolation VM, sim and sief pages are allocated by > * paravisor. These pages also will be used by kdump > @@ -320,9 +333,9 @@ void hv_synic_disable_regs(unsigned int cpu) > else > simp.base_simp_gpa = 0; > > - hv_set_register(HV_REGISTER_SIMP, simp.as_uint64); > + hv_set_register(REG_SIMP, simp.as_uint64); > > - siefp.as_uint64 = hv_get_register(HV_REGISTER_SIEFP); > + siefp.as_uint64 = hv_get_register(REG_SIEFP); > siefp.siefp_enabled = 0; > > if (hv_isolation_type_snp()) > @@ -330,12 +343,12 @@ void hv_synic_disable_regs(unsigned int cpu) > else > siefp.base_siefp_gpa = 0; > > - hv_set_register(HV_REGISTER_SIEFP, siefp.as_uint64); > + hv_set_register(REG_SIEFP, siefp.as_uint64); > > /* Disable the global synic bit */ > - sctrl.as_uint64 = hv_get_register(HV_REGISTER_SCONTROL); > + sctrl.as_uint64 = hv_get_register(REG_SCTRL); > sctrl.enable = 0; > - hv_set_register(HV_REGISTER_SCONTROL, sctrl.as_uint64); > + hv_set_register(REG_SCTRL, sctrl.as_uint64); > > if (vmbus_irq != -1) > disable_percpu_irq(vmbus_irq); -- Vitaly