RE: [PATCH v7 2/5] Drivers: hv: Setup synic registers in case of nested root partition

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> Sent: Thursday, December 1, 2022 11:05 PM
> 
> On 12/2/2022 9:30 AM, Michael Kelley (LINUX) wrote:
> > From: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx> Sent: Thursday, December 1,
> 2022 3:04 AM
> >> Child partitions are free to allocate SynIC message and event page but in
> >> case of root partition it must use the pages allocated by Microsoft
> >> Hypervisor (MSHV). Base address for these pages can be found using
> >> synthetic MSRs exposed by MSHV. There is a slight difference in those MSRs
> >> for nested vs non-nested root partition.
> >>
> >> Signed-off-by: Jinank Jain <jinankjain@xxxxxxxxxxxxxxxxxxx>
> >> ---
> >>   arch/x86/include/asm/hyperv-tlfs.h | 11 ++++
> >>   arch/x86/include/asm/mshyperv.h    | 30 ++-------
> >>   arch/x86/kernel/cpu/mshyperv.c     | 69 +++++++++++++++++++++
> >>   drivers/hv/hv.c                    | 99 ++++++++++++++++++++++--------
> >>   include/asm-generic/mshyperv.h     |  5 +-
> >>   5 files changed, 165 insertions(+), 49 deletions(-)
> >>
> >> diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-
> tlfs.h
> >> index 58c03d18c235..b5019becb618 100644
> >> --- a/arch/x86/include/asm/hyperv-tlfs.h
> >> +++ b/arch/x86/include/asm/hyperv-tlfs.h
> >> @@ -225,6 +225,17 @@ enum hv_isolation_type {
> >>   #define HV_REGISTER_SINT14			0x4000009E
> >>   #define HV_REGISTER_SINT15			0x4000009F
> >>
> >> +/*
> >> + * Define synthetic interrupt controller model specific registers for
> >> + * nested hypervisor.
> >> + */
> >> +#define HV_REGISTER_NESTED_SCONTROL            0x40001080
> >> +#define HV_REGISTER_NESTED_SVERSION            0x40001081
> >> +#define HV_REGISTER_NESTED_SIEFP               0x40001082
> >> +#define HV_REGISTER_NESTED_SIMP                0x40001083
> >> +#define HV_REGISTER_NESTED_EOM                 0x40001084
> >> +#define HV_REGISTER_NESTED_SINT0               0x40001090
> >> +
> >>   /*
> >>    * Synthetic Timer MSRs. Four timers per vcpu.
> >>    */
> >> diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h
> >> index 61f0c206bff0..3197d49c888c 100644
> >> --- a/arch/x86/include/asm/mshyperv.h
> >> +++ b/arch/x86/include/asm/mshyperv.h
> >> @@ -198,30 +198,10 @@ static inline bool hv_is_synic_reg(unsigned int reg)
> >>   	return false;
> >>   }
> >>
> >> -static inline u64 hv_get_register(unsigned int reg)
> >> -{
> >> -	u64 value;
> >> -
> >> -	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> >> -		hv_ghcb_msr_read(reg, &value);
> >> -	else
> >> -		rdmsrl(reg, value);
> >> -	return value;
> >> -}
> >> -
> >> -static inline void hv_set_register(unsigned int reg, u64 value)
> >> -{
> >> -	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> >> -		hv_ghcb_msr_write(reg, value);
> >> -
> >> -		/* Write proxy bit via wrmsl instruction */
> >> -		if (reg >= HV_REGISTER_SINT0 &&
> >> -		    reg <= HV_REGISTER_SINT15)
> >> -			wrmsrl(reg, value | 1 << 20);
> >> -	} else {
> >> -		wrmsrl(reg, value);
> >> -	}
> >> -}
> >> +u64 hv_get_register(unsigned int reg);
> >> +void hv_set_register(unsigned int reg, u64 value);
> >> +u64 hv_get_nested_register(unsigned int reg);
> >> +void hv_set_nested_register(unsigned int reg, u64 value);
> >>
> >>   #else /* CONFIG_HYPERV */
> >>   static inline void hyperv_init(void) {}
> >> @@ -241,6 +221,8 @@ static inline int hyperv_flush_guest_mapping_range(u64 as,
> >>   }
> >>   static inline void hv_set_register(unsigned int reg, u64 value) { }
> >>   static inline u64 hv_get_register(unsigned int reg) { return 0; }
> >> +static inline void hv_set_nested_register(unsigned int reg, u64 value) { }
> >> +static inline u64 hv_get_nested_register(unsigned int reg) { return 0; }
> >>   static inline int hv_set_mem_host_visibility(unsigned long addr, int numpages,
> >>   					     bool visible)
> >>   {
> >> diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c
> >> index f9b78d4829e3..f2f6e10301a8 100644
> >> --- a/arch/x86/kernel/cpu/mshyperv.c
> >> +++ b/arch/x86/kernel/cpu/mshyperv.c
> >> @@ -41,7 +41,76 @@ bool hv_root_partition;
> >>   bool hv_nested;
> >>   struct ms_hyperv_info ms_hyperv;
> >>
> >> +static inline unsigned int hv_get_nested_reg(unsigned int reg)
> >> +{
> >> +	switch (reg) {
> >> +	case HV_REGISTER_SIMP:
> >> +		return HV_REGISTER_NESTED_SIMP;
> >> +	case HV_REGISTER_NESTED_SIEFP:
> >> +		return HV_REGISTER_SIEFP;
> >> +	case HV_REGISTER_SCONTROL:
> >> +		return HV_REGISTER_NESTED_SCONTROL;
> >> +	case HV_REGISTER_SINT0:
> >> +		return HV_REGISTER_NESTED_SINT0;
> >> +	case HV_REGISTER_EOM:
> >> +		return HV_REGISTER_NESTED_EOM;
> >> +	default:
> >> +		return reg;
> >> +	}
> > Just a question:  You added #defines for 6 nested registers.  But
> > the switch statement above maps only 5 registers.  Is it intentional
> > that there's not a mapping for HV_REGISTER_SVERSION?
> 
> Good catch! Will fix it in the next revision.
> 
> >
> >> +}
> >> +
> >>   #if IS_ENABLED(CONFIG_HYPERV)
> >> +static u64 _hv_get_register(unsigned int reg, bool nested)
> >> +{
> >> +	u64 value;
> >> +
> >> +	if (nested)
> >> +		reg = hv_get_nested_reg(reg);
> >> +
> >> +	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
> >> +		hv_ghcb_msr_read(reg, &value);
> >> +	else
> >> +		rdmsrl(reg, value);
> >> +	return value;
> >> +}
> >> +
> >> +static void _hv_set_register(unsigned int reg, u64 value, bool nested)
> >> +{
> >> +	if (nested)
> >> +		reg = hv_get_nested_reg(reg);
> >> +
> >> +	if (hv_is_synic_reg(reg) && hv_isolation_type_snp()) {
> >> +		hv_ghcb_msr_write(reg, value);
> >> +
> >> +		/* Write proxy bit via wrmsl instruction */
> >> +		if (reg >= HV_REGISTER_SINT0 &&
> >> +		    reg <= HV_REGISTER_SINT15)
> >> +			wrmsrl(reg, value | 1 << 20);
> >> +	} else {
> >> +		wrmsrl(reg, value);
> >> +	}
> >> +}
> >> +
> >> +u64 hv_get_register(unsigned int reg)
> >> +{
> >> +	return _hv_get_register(reg, false);
> >> +}
> >> +
> >> +void hv_set_register(unsigned int reg, u64 value)
> >> +{
> >> +	_hv_set_register(reg, value, false);
> >> +}
> >> +
> >> +u64 hv_get_nested_register(unsigned int reg)
> >> +{
> >> +	return _hv_get_register(reg, true);
> >> +}
> >> +
> >> +void hv_set_nested_register(unsigned int reg, u64 value)
> >> +{
> >> +	_hv_set_register(reg, value, true);
> >> +}
> >> +
> >>   static void (*vmbus_handler)(void);
> >>   static void (*hv_stimer0_handler)(void);
> >>   static void (*hv_kexec_handler)(void);
> >> diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c
> >> index 4d6480d57546..0ed052f2423e 100644
> >> --- a/drivers/hv/hv.c
> >> +++ b/drivers/hv/hv.c
> >> @@ -147,7 +147,7 @@ int hv_synic_alloc(void)
> >>   		 * Synic message and event pages are allocated by paravisor.
> >>   		 * Skip these pages allocation here.
> >>   		 */
> >> -		if (!hv_isolation_type_snp()) {
> >> +		if (!hv_isolation_type_snp() && !hv_root_partition) {
> >>   			hv_cpu->synic_message_page =
> >>   				(void *)get_zeroed_page(GFP_ATOMIC);
> >>   			if (hv_cpu->synic_message_page == NULL) {
> >> @@ -188,8 +188,16 @@ void hv_synic_free(void)
> >>   		struct hv_per_cpu_context *hv_cpu
> >>   			= per_cpu_ptr(hv_context.cpu_context, cpu);
> >>
> >> -		free_page((unsigned long)hv_cpu->synic_event_page);
> >> -		free_page((unsigned long)hv_cpu->synic_message_page);
> >> +		if (hv_root_partition) {
> >> +			if (hv_cpu->synic_event_page != NULL)
> >> +				memunmap(hv_cpu->synic_event_page);
> >> +
> >> +			if (hv_cpu->synic_message_page != NULL)
> >> +				memunmap(hv_cpu->synic_message_page);
> >> +		} else {
> >> +			free_page((unsigned long)hv_cpu->synic_event_page);
> >> +			free_page((unsigned long)hv_cpu->synic_message_page);
> >> +		}
> >>   		free_page((unsigned long)hv_cpu->post_msg_page);
> >>   	}
> >>
> >> @@ -213,10 +221,12 @@ void hv_synic_enable_regs(unsigned int cpu)
> >>   	union hv_synic_scontrol sctrl;
> >>
> >>   	/* Setup the Synic's message page */
> >> -	simp.as_uint64 = hv_get_register(HV_REGISTER_SIMP);
> >> +	simp.as_uint64 = hv_nested ? hv_get_nested_register(HV_REGISTER_SIMP) :
> >> +				     hv_get_register(HV_REGISTER_SIMP);
> > Unfortunately, this code and the similar places below will run into
> > problems on ARM64.  Drivers/hv/hv.c is common code on all architectures
> > so it needs to compile and run on ARM64 as well as x86/x64.  But there's
> > no hv_get_nested_register() defined or implemented on the ARM64 side,
> > so the code will fail to compile.
> >
> > I think there's a better way to do this.   Based on Nuno's comments, it
> > seems like there are two hv_get_register() functions needed:
> >
> > 1)  Get the value of the register or its nested cousin, based on the value
> > of hv_nested.  That's what you are explicitly coding here.
> > 2) Get the value of the register.  Don't access the nested cousin, regardless
> > of the value of hv_nested.
> >
> > Based on how you coded things earlier, I'm assuming #1 is what you want to
> > use in most cases, and specifically here in drivers/hv/hv.c.  That's good,
> > because #1 can hide the testing of hv_nested in the x86-specific
> > implementation of hv_get_register(), while the ARM64 version of
> > hv_get_register() continues to do whatever it does now with no changes.
> >
> > I'm also assuming that #2 may be used in particular cases in the code
> > that is specifically related to nesting.  Give the #2 version a different
> > name --- maybe hv_get_nonnested_register(), or something like that --
> > and use it only in code under arch/x86 that is related to nesting.  That
> > way, ARM64 won't be affected.
> >
> > Of course, the same approach applies to hv_set_register().
> >
> > hv_get_register() and hv_get_nonnested_register() will obviously
> > share some code.  But rather than calling a common function starting
> > with underscore like you've done above, let me suggest that
> > hv_get_register() test hv_nested and potentially do the translation,
> > then call hv_get_nonnested_register().  That way you'll end up
> > with just two functions instead of three as above with
> > hv_get_register(), hv_get_nested_register(), and _hv_get_register().
> 
> I tried the way you suggested and it worked for ARM64 this time.

OK, good.

> But still I would have three functions. Because the base function
> _hv_get_register() would still be required in order to avoid code
> duplication in hv_get_non_nested_register().

To make this is a bit more concrete, here's what I was thinking
(not even compile tested):

u64 hv_get_non_nested_register(unsigned int reg)
{
	u64 value;
	if (hv_is_synic_reg(reg) && hv_isolation_type_snp())
		hv_ghcb_msr_read(reg, &value);
	else
		rdmsrl(reg, value);
	return value;
}

u64 hv_get_register(unsigned int reg)
{
	if (hv_nested)
		reg = hv_get_nested_reg(reg);
	return hv_get_non_nested_register(reg);
}

But maybe I'm missing something ....

Michael




[Index of Archives]     [Linux Kernel]     [Kernel Newbies]     [x86 Platform Driver]     [Netdev]     [Linux Wireless]     [Netfilter]     [Bugtraq]     [Linux Filesystems]     [Yosemite Discussion]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]

  Powered by Linux