Re: [RFC PATCH v5 042/104] KVM: x86/mmu: Track shadow MMIO value/mask on a per-VM basis

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 06, 2022 at 11:06:41PM +1200,
Kai Huang <kai.huang@xxxxxxxxx> wrote:

> > diff --git a/arch/x86/kvm/mmu/spte.c b/arch/x86/kvm/mmu/spte.c
> > index 5071e8332db2..ea83927b9231 100644
> > --- a/arch/x86/kvm/mmu/spte.c
> > +++ b/arch/x86/kvm/mmu/spte.c
> > @@ -29,8 +29,7 @@ u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
> >  u64 __read_mostly shadow_user_mask;
> >  u64 __read_mostly shadow_accessed_mask;
> >  u64 __read_mostly shadow_dirty_mask;
> > -u64 __read_mostly shadow_mmio_value;
> > -u64 __read_mostly shadow_mmio_mask;
> > +u64 __read_mostly shadow_default_mmio_mask;
> >  u64 __read_mostly shadow_mmio_access_mask;
> >  u64 __read_mostly shadow_present_mask;
> >  u64 __read_mostly shadow_me_mask;
> > @@ -59,10 +58,11 @@ u64 make_mmio_spte(struct kvm_vcpu *vcpu, u64 gfn, unsigned int access)
> >  	u64 spte = generation_mmio_spte_mask(gen);
> >  	u64 gpa = gfn << PAGE_SHIFT;
> >  
> > -	WARN_ON_ONCE(!shadow_mmio_value);
> > +	WARN_ON_ONCE(!vcpu->kvm->arch.shadow_mmio_value &&
> > +		     !kvm_gfn_stolen_mask(vcpu->kvm));
> >  
> >  	access &= shadow_mmio_access_mask;
> > -	spte |= shadow_mmio_value | access;
> > +	spte |= vcpu->kvm->arch.shadow_mmio_value | access;
> >  	spte |= gpa | shadow_nonpresent_or_rsvd_mask;
> >  	spte |= (gpa & shadow_nonpresent_or_rsvd_mask)
> >  		<< SHADOW_NONPRESENT_OR_RSVD_MASK_LEN;
> > @@ -279,7 +279,8 @@ u64 mark_spte_for_access_track(u64 spte)
> >  	return spte;
> >  }
> >  
> > -void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
> > +void kvm_mmu_set_mmio_spte_mask(struct kvm *kvm, u64 mmio_value, u64 mmio_mask,
> > +				u64 access_mask)
> >  {
> >  	BUG_ON((u64)(unsigned)access_mask != access_mask);
> >  	WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask);
> > @@ -308,39 +309,32 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 mmio_mask, u64 access_mask)
> >  	    WARN_ON(mmio_value && (REMOVED_SPTE & mmio_mask) == mmio_value))
> >  		mmio_value = 0;
> >  
> > -	shadow_mmio_value = mmio_value;
> > -	shadow_mmio_mask  = mmio_mask;
> > +	kvm->arch.shadow_mmio_value = mmio_value;
> > +	kvm->arch.shadow_mmio_mask = mmio_mask;
> >  	shadow_mmio_access_mask = access_mask;
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
> >  
> > -void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only)
> > +void kvm_mmu_set_ept_masks(bool has_ad_bits, bool has_exec_only, u64 init_value)
> >  {
> >  	shadow_user_mask	= VMX_EPT_READABLE_MASK;
> >  	shadow_accessed_mask	= has_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull;
> >  	shadow_dirty_mask	= has_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull;
> >  	shadow_nx_mask		= 0ull;
> >  	shadow_x_mask		= VMX_EPT_EXECUTABLE_MASK;
> > -	shadow_present_mask	= has_exec_only ? 0ull : VMX_EPT_READABLE_MASK;
> > +	shadow_present_mask	=
> > +		(has_exec_only ? 0ull : VMX_EPT_READABLE_MASK) | init_value;
> 
> This change doesn't seem make any sense.  Why should "Suppress #VE" bit be set
> for a present PTE?

Because W or NX violation also needs #VE.  Although the name uses present, it's
actually readable.


> >  	shadow_acc_track_mask	= VMX_EPT_RWX_MASK;
> >  	shadow_me_mask		= 0ull;
> >  
> >  	shadow_host_writable_mask = EPT_SPTE_HOST_WRITABLE;
> >  	shadow_mmu_writable_mask  = EPT_SPTE_MMU_WRITABLE;
> > -
> > -	/*
> > -	 * EPT Misconfigurations are generated if the value of bits 2:0
> > -	 * of an EPT paging-structure entry is 110b (write/execute).
> > -	 */
> > -	kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE,
> > -				   VMX_EPT_RWX_MASK, 0);
> >  }
> >  EXPORT_SYMBOL_GPL(kvm_mmu_set_ept_masks);
> >  
> >  void kvm_mmu_reset_all_pte_masks(void)
> >  {
> >  	u8 low_phys_bits;
> > -	u64 mask;
> >  
> >  	shadow_phys_bits = kvm_get_shadow_phys_bits();
> >  
> > @@ -389,9 +383,13 @@ void kvm_mmu_reset_all_pte_masks(void)
> >  	 * PTEs and so the reserved PA approach must be disabled.
> >  	 */
> >  	if (shadow_phys_bits < 52)
> > -		mask = BIT_ULL(51) | PT_PRESENT_MASK;
> > +		shadow_default_mmio_mask = BIT_ULL(51) | PT_PRESENT_MASK;
> 
> Hmm...  Not related to this patch, but it seems there's a bug here.  On a MKTME
> enabled system (but not TDX) with 52 physical bits, the shadow_phys_bits will be
> set to < 52 (depending on how many MKTME KeyIDs are configured by BIOS).  In
> this case, bit 51 is set, but actually bit 51 isn't a reserved bit in this case.
> Instead, it is a MKTME KeyID bit.  Therefore, above setting won't cause #PF, but
> will use a non-zero MKTME keyID to access the physical address.
> 
> Paolo/Sean, any comments here?
> 
> >  	else
> > -		mask = 0;
> > +		shadow_default_mmio_mask = 0;
> > +}
> >  
> > -	kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK);
> > +void kvm_mmu_set_default_mmio_spte_mask(u64 mask)
> > +{
> > +	shadow_default_mmio_mask = mask;
> >  }
> > +EXPORT_SYMBOL_GPL(kvm_mmu_set_default_mmio_spte_mask);
> > diff --git a/arch/x86/kvm/mmu/spte.h b/arch/x86/kvm/mmu/spte.h
> > index 8e13a35ab8c9..bde843bce878 100644
> > --- a/arch/x86/kvm/mmu/spte.h
> > +++ b/arch/x86/kvm/mmu/spte.h
> > @@ -165,8 +165,7 @@ extern u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */
> >  extern u64 __read_mostly shadow_user_mask;
> >  extern u64 __read_mostly shadow_accessed_mask;
> >  extern u64 __read_mostly shadow_dirty_mask;
> > -extern u64 __read_mostly shadow_mmio_value;
> > -extern u64 __read_mostly shadow_mmio_mask;
> > +extern u64 __read_mostly shadow_default_mmio_mask;
> >  extern u64 __read_mostly shadow_mmio_access_mask;
> >  extern u64 __read_mostly shadow_present_mask;
> >  extern u64 __read_mostly shadow_me_mask;
> > @@ -229,10 +228,10 @@ extern u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
> >   */
> >  extern u8 __read_mostly shadow_phys_bits;
> >  
> > -static inline bool is_mmio_spte(u64 spte)
> > +static inline bool is_mmio_spte(struct kvm *kvm, u64 spte)
> >  {
> > -	return (spte & shadow_mmio_mask) == shadow_mmio_value &&
> > -	       likely(shadow_mmio_value);
> > +	return (spte & kvm->arch.shadow_mmio_mask) == kvm->arch.shadow_mmio_value &&
> > +		likely(kvm->arch.shadow_mmio_value || kvm_gfn_stolen_mask(kvm));
> 
> I don't like using kvm_gfn_stolen_mask() to check whether SPTE is MMIO. 
> kvm_gfn_stolen_mask() really doesn't imply anything regarding to setting up the
> value of MMIO SPTE.  At least, I guess we can use some is_protected_vm() sort of
> things since it implies guest memory is protected therefore legacy way handling
> of MMIO doesn't work (i.e. you cannot parse MMIO instruction).

As discussed in other thread, let's rename those functions.


> > diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
> > index 07fd892768be..00f88aa25047 100644
> > --- a/arch/x86/kvm/vmx/vmx.c
> > +++ b/arch/x86/kvm/vmx/vmx.c
> > @@ -7065,6 +7065,14 @@ int vmx_vm_init(struct kvm *kvm)
> >  	if (!ple_gap)
> >  		kvm->arch.pause_in_guest = true;
> >  
> > +	/*
> > +	 * EPT Misconfigurations can be generated if the value of bits 2:0
> > +	 * of an EPT paging-structure entry is 110b (write/execute).
> > +	 */
> > +	if (enable_ept)
> > +		kvm_mmu_set_mmio_spte_mask(kvm, VMX_EPT_MISCONFIG_WX_VALUE,
> > +					   VMX_EPT_MISCONFIG_WX_VALUE, 0);
> 
> Should be:
> 
> 	kvm_mmu_set_mmio_spte_mask(kvm, VMX_EPT_MISCONFIG_WX_VALUE,
> 				   	VMX_EPT_RWX_MASK, 0);

Thanks for catching it.  It's fixed in github repo.
-- 
Isaku Yamahata <isaku.yamahata@xxxxxxxxx>



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux