From: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> Sent: Thursday, December 1, 2022 11:05 PM > > On 12/2/2022 9:30 AM, Michael Kelley (LINUX) wrote: > > From: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> Sent: Thursday, December 1, > 2022 3:04 AM > >> Child partitions are free to allocate SynIC message and event page but in > >> case of root partition it must use the pages allocated by Microsoft > >> Hypervisor (MSHV). Base address for these pages can be found using > >> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs > >> for nested vs non-nested root partition. > >> > >> Signed-off-by: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> > >> --- > >> arch/x86/include/asm/hyperv-tlfs.h | 11 ++++ > >> arch/x86/include/asm/mshyperv.h | 30 ++------- > >> arch/x86/kernel/cpu/mshyperv.c | 69 +++++++++++++++++++++ > >> drivers/hv/hv.c | 99 ++++++++++++++++++++++-------- > >> include/asm-generic/mshyperv.h | 5 +- > >> 5 files changed, 165 insertions(+), 49 deletions(-) > >> > >> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv- > tlfs.h > >> index 58c03d18c235..b5019becb618 100644 > >> --- a/arch/x86/include/asm/hyperv-tlfs.h > >> +++ b/arch/x86/include/asm/hyperv-tlfs.h > >> @@ -225,6 +225,17 @@ enum hv_isolation_type { > >> #define HV_REGISTER_SINT14 0x4000009E > >> #define HV_REGISTER_SINT15 0x4000009F > >> > >> +/* > >> + * Define synthetic interrupt controller model specific registers for > >> + * nested hypervisor. > >> + */ > >> +#define HV_REGISTER_NESTED_SCONTROL 0x40001080 > >> +#define HV_REGISTER_NESTED_SVERSION 0x40001081 > >> +#define HV_REGISTER_NESTED_SIEFP 0x40001082 > >> +#define HV_REGISTER_NESTED_SIMP 0x40001083 > >> +#define HV_REGISTER_NESTED_EOM 0x40001084 > >> +#define HV_REGISTER_NESTED_SINT0 0x40001090 > >> + > >> /* > >> * Synthetic Timer MSRs. Four timers per vcpu. > >> */ > >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h > >> index 61f0c206bff0..3197d49c888c 100644 > >> --- a/arch/x86/include/asm/mshyperv.h > >> +++ b/arch/x86/include/asm/mshyperv.h > >> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg) > >> return false; > >> } > >> > >> -static inline u64 hv_get_register(unsigned int reg) > >> -{ > >> - u64 value; > >> - > >> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) > >> - hv_ghcb_msr_read(reg, &value); > >> - else > >> - rdmsrl(reg, value); > >> - return value; > >> -} > >> - > >> -static inline void hv_set_register(unsigned int reg, u64 value) > >> -{ > >> - if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { > >> - hv_ghcb_msr_write(reg, value); > >> - > >> - /* Write proxy bit via wrmsl instruction */ > >> - if (reg >= HV_REGISTER_SINT0 && > >> - reg <= HV_REGISTER_SINT15) > >> - wrmsrl(reg, value | 1 << 20); > >> - } else { > >> - wrmsrl(reg, value); > >> - } > >> -} > >> +u64 hv_get_register(unsigned int reg); > >> +void hv_set_register(unsigned int reg, u64 value); > >> +u64 hv_get_nested_register(unsigned int reg); > >> +void hv_set_nested_register(unsigned int reg, u64 value); > >> > >> #else /* CONFIG_HYPERV */ > >> static inline void hyperv_init(void) {} > >> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as, > >> } > >> static inline void hv_set_register(unsigned int reg, u64 value) { } > >> static inline u64 hv_get_register(unsigned int reg) { return 0; } > >> +static inline void hv_set_nested_register(unsigned int reg, u64 value) { } > >> +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; } > >> static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages, > >> bool visible) > >> { > >> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c > >> index f9b78d4829e3..f2f6e10301a8 100644 > >> --- a/arch/x86/kernel/cpu/mshyperv.c > >> +++ b/arch/x86/kernel/cpu/mshyperv.c > >> @@ -41,7 +41,76 @@ bool hv_root_partition; > >> bool hv_nested; > >> struct ms_hyperv_info ms_hyperv; > >> > >> +static inline unsigned int hv_get_nested_reg(unsigned int reg) > >> +{ > >> + switch (reg) { > >> + case HV_REGISTER_SIMP: > >> + return HV_REGISTER_NESTED_SIMP; > >> + case HV_REGISTER_NESTED_SIEFP: > >> + return HV_REGISTER_SIEFP; > >> + case HV_REGISTER_SCONTROL: > >> + return HV_REGISTER_NESTED_SCONTROL; > >> + case HV_REGISTER_SINT0: > >> + return HV_REGISTER_NESTED_SINT0; > >> + case HV_REGISTER_EOM: > >> + return HV_REGISTER_NESTED_EOM; > >> + default: > >> + return reg; > >> + } > > Just a question: You added #defines for 6 nested registers. But > > the switch statement above maps only 5 registers. Is it intentional > > that there's not a mapping for HV_REGISTER_SVERSION? > > Good catch! Will fix it in the next revision. > > > > >> +} > >> + > >> #if IS_ENABLED(CONFIG_HYPERV) > >> +static u64 _hv_get_register(unsigned int reg, bool nested) > >> +{ > >> + u64 value; > >> + > >> + if (nested) > >> + reg = hv_get_nested_reg(reg); > >> + > >> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) > >> + hv_ghcb_msr_read(reg, &value); > >> + else > >> + rdmsrl(reg, value); > >> + return value; > >> +} > >> + > >> +static void _hv_set_register(unsigned int reg, u64 value, bool nested) > >> +{ > >> + if (nested) > >> + reg = hv_get_nested_reg(reg); > >> + > >> + if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) { > >> + hv_ghcb_msr_write(reg, value); > >> + > >> + /* Write proxy bit via wrmsl instruction */ > >> + if (reg >= HV_REGISTER_SINT0 && > >> + reg <= HV_REGISTER_SINT15) > >> + wrmsrl(reg, value | 1 << 20); > >> + } else { > >> + wrmsrl(reg, value); > >> + } > >> +} > >> + > >> +u64 hv_get_register(unsigned int reg) > >> +{ > >> + return _hv_get_register(reg, false); > >> +} > >> + > >> +void hv_set_register(unsigned int reg, u64 value) > >> +{ > >> + _hv_set_register(reg, value, false); > >> +} > >> + > >> +u64 hv_get_nested_register(unsigned int reg) > >> +{ > >> + return _hv_get_register(reg, true); > >> +} > >> + > >> +void hv_set_nested_register(unsigned int reg, u64 value) > >> +{ > >> + _hv_set_register(reg, value, true); > >> +} > >> + > >> static void (*vmbus_handler)(void); > >> static void (*hv_stimer0_handler)(void); > >> static void (*hv_kexec_handler)(void); > >> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c > >> index 4d6480d57546..0ed052f2423e 100644 > >> --- a/drivers/hv/hv.c > >> +++ b/drivers/hv/hv.c > >> @@ -147,7 +147,7 @@ int hv_synic_alloc(void) > >> * Synic message and event pages are allocated by paravisor. > >> * Skip these pages allocation here. > >> */ > >> - if (!hv_isolation_type_snp()) { > >> + if (!hv_isolation_type_snp() && !hv_root_partition) { > >> hv_cpu->synic_message_page = > >> (void *)get_zeroed_page(GFP_ATOMIC); > >> if (hv_cpu->synic_message_page == NULL) { > >> @@ -188,8 +188,16 @@ void hv_synic_free(void) > >> struct hv_per_cpu_context *hv_cpu > >> = per_cpu_ptr(hv_context.cpu_context, cpu); > >> > >> - free_page((unsigned long)hv_cpu->synic_event_page); > >> - free_page((unsigned long)hv_cpu->synic_message_page); > >> + if (hv_root_partition) { > >> + if (hv_cpu->synic_event_page != NULL) > >> + memunmap(hv_cpu->synic_event_page); > >> + > >> + if (hv_cpu->synic_message_page != NULL) > >> + memunmap(hv_cpu->synic_message_page); > >> + } else { > >> + free_page((unsigned long)hv_cpu->synic_event_page); > >> + free_page((unsigned long)hv_cpu->synic_message_page); > >> + } > >> free_page((unsigned long)hv_cpu->post_msg_page); > >> } > >> > >> @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu) > >> union hv_synic_scontrol sctrl; > >> > >> /* Setup the Synic's message page */ > >> - simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP); > >> + simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) : > >> + hv_get_register(HV_REGISTER_SIMP); > > Unfortunately, this code and the similar places below will run into > > problems on ARM64. Drivers/hv/hv.c is common code on all architectures > > so it needs to compile and run on ARM64 as well as x86/x64. But there's > > no hv_get_nested_register() defined or implemented on the ARM64 side, > > so the code will fail to compile. > > > > I think there's a better way to do this. Based on Nuno's comments, it > > seems like there are two hv_get_register() functions needed: > > > > 1) Get the value of the register or its nested cousin, based on the value > > of hv_nested. That's what you are explicitly coding here. > > 2) Get the value of the register. Don't access the nested cousin, regardless > > of the value of hv_nested. > > > > Based on how you coded things earlier, I'm assuming #1 is what you want to > > use in most cases, and specifically here in drivers/hv/hv.c. That's good, > > because #1 can hide the testing of hv_nested in the x86-specific > > implementation of hv_get_register(), while the ARM64 version of > > hv_get_register() continues to do whatever it does now with no changes. > > > > I'm also assuming that #2 may be used in particular cases in the code > > that is specifically related to nesting. Give the #2 version a different > > name --- maybe hv_get_nonnested_register(), or something like that -- > > and use it only in code under arch/x86 that is related to nesting. That > > way, ARM64 won't be affected. > > > > Of course, the same approach applies to hv_set_register(). > > > > hv_get_register() and hv_get_nonnested_register() will obviously > > share some code. But rather than calling a common function starting > > with underscore like you've done above, let me suggest that > > hv_get_register() test hv_nested and potentially do the translation, > > then call hv_get_nonnested_register(). That way you'll end up > > with just two functions instead of three as above with > > hv_get_register(), hv_get_nested_register(), and _hv_get_register(). > > I tried the way you suggested and it worked for ARM64 this time. OK, good. > But still I would have three functions. Because the base function > _hv_get_register() would still be required in order to avoid code > duplication in hv_get_non_nested_register(). To make this is a bit more concrete, here's what I was thinking (not even compile tested): u64 hv_get_non_nested_register(unsigned int reg) { u64 value; if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) hv_ghcb_msr_read(reg, &value); else rdmsrl(reg, value); return value; } u64 hv_get_register(unsigned int reg) { if (hv_nested) reg = hv_get_nested_reg(reg); return hv_get_non_nested_register(reg); } But maybe I'm missing something .... Michael