Re: [PATCH 1/1] kvm: x86: Set highest physical address bits in non-present/reserved SPTEs

Sakari Ailus <sakari.ailus@xxxxxx> · Mon, 24 Sep 2018 22:57:41 +0300

Hi Junaid,

(Cc'ing LKML.)

> commit 28a1f3ac1d0c8558ee4453d9634dad891a6e922e upstream.
> 
> Always set the 5 upper-most supported physical address bits to 1 for SPTEs
> that are marked as non-present or reserved, to make them unusable for
> L1TF attacks from the guest. Currently, this just applies to MMIO SPTEs.
> (We do not need to mark PTEs that are completely 0 as physical page 0
> is already reserved.)

I'm not subscribed to the kvm list nor I did find the original e-mail with
the headers so I constructed the reply from the bits I did find. If
something goes wrong it's probably because of that.

This patch in the host kernel breaks KVM guests on Core 2 Duo (T9500). KVM
guests do not boot on plain v4.18.9 (which contains the patch) host;
reverting this patch fixes it.

The contents of /proc/cpuinfo are below in case it matters:

processor	: 0
vendor_id	: GenuineIntel
cpu family	: 6
model		: 23
model name	: Intel(R) Core(TM)2 Duo CPU     T9500  @ 2.60GHz
stepping	: 6
microcode	: 0x60f
cpu MHz		: 798.083
cache size	: 6144 KB
physical id	: 0
siblings	: 2
core id		: 0
cpu cores	: 2
apicid		: 0
initial apicid	: 0
fpu		: yes
fpu_exception	: yes
cpuid level	: 10
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc arch_perfmon pebs bts rep_good nopl cpuid aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm sse4_1 lahf_lm pti tpr_shadow vnmi flexpriority dtherm ida
bugs		: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf
bogomips	: 5187.28
clflush size	: 64
cache_alignment	: 64
address sizes	: 36 bits physical, 48 bits virtual
power management:

processor	: 1
vendor_id	: GenuineIntel
cpu family	: 6
model		: 23
model name	: Intel(R) Core(TM)2 Duo CPU     T9500  @ 2.60GHz
stepping	: 6
microcode	: 0x60f
cpu MHz		: 798.062
cache size	: 6144 KB
physical id	: 0
siblings	: 2
core id		: 1
cpu cores	: 2
apicid		: 1
initial apicid	: 1
fpu		: yes
fpu_exception	: yes
cpuid level	: 10
wp		: yes
flags		: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe syscall nx lm constant_tsc arch_perfmon pebs bts rep_good nopl cpuid aperfmperf pni dtes64 monitor ds_cpl vmx est tm2 ssse3 cx16 xtpr pdcm sse4_1 lahf_lm pti tpr_shadow vnmi flexpriority dtherm ida
bugs		: cpu_meltdown spectre_v1 spectre_v2 spec_store_bypass l1tf
bogomips	: 5187.28
clflush size	: 64
cache_alignment	: 64
address sizes	: 36 bits physical, 48 bits virtual
power management:

> 
> This allows mitigation of L1TF without disabling hyper-threading by using
> shadow paging mode instead of EPT.
> 
> Signed-off-by: Junaid Shahid <junaids@xxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
> ---
>  arch/x86/kvm/mmu.c | 43 ++++++++++++++++++++++++++++++++++++++-----
>  arch/x86/kvm/x86.c |  8 ++++++--
>  2 files changed, 44 insertions(+), 7 deletions(-)
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index a44e568363a4..42f1ba92622a 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -221,6 +221,17 @@ static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK |
>  						    PT64_EPT_EXECUTABLE_MASK;
>  static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT;
>  
> +/*
> + * This mask must be set on all non-zero Non-Present or Reserved SPTEs in order
> + * to guard against L1TF attacks.
> + */
> +static u64 __read_mostly shadow_nonpresent_or_rsvd_mask;
> +
> +/*
> + * The number of high-order 1 bits to use in the mask above.
> + */
> +static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
> +
>  static void mmu_spte_set(u64 *sptep, u64 spte);
>  
>  void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value)
> @@ -308,9 +319,13 @@ static void mark_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, u64 gfn,
>  {
>  	unsigned int gen = kvm_current_mmio_generation(vcpu);
>  	u64 mask = generation_mmio_spte_mask(gen);
> +	u64 gpa = gfn << PAGE_SHIFT;
>  
>  	access &= ACC_WRITE_MASK | ACC_USER_MASK;
> -	mask |= shadow_mmio_value | access | gfn << PAGE_SHIFT;
> +	mask |= shadow_mmio_value | access;
> +	mask |= gpa | shadow_nonpresent_or_rsvd_mask;
> +	mask |= (gpa & shadow_nonpresent_or_rsvd_mask)
> +		<< shadow_nonpresent_or_rsvd_mask_len;
>  
>  	trace_mark_mmio_spte(sptep, gfn, access, gen);
>  	mmu_spte_set(sptep, mask);
> @@ -323,8 +338,14 @@ static bool is_mmio_spte(u64 spte)
>  
>  static gfn_t get_mmio_spte_gfn(u64 spte)
>  {
> -	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask;
> -	return (spte & ~mask) >> PAGE_SHIFT;
> +	u64 mask = generation_mmio_spte_mask(MMIO_GEN_MASK) | shadow_mmio_mask |
> +		   shadow_nonpresent_or_rsvd_mask;
> +	u64 gpa = spte & ~mask;
> +
> +	gpa |= (spte >> shadow_nonpresent_or_rsvd_mask_len)
> +	       & shadow_nonpresent_or_rsvd_mask;
> +
> +	return gpa >> PAGE_SHIFT;
>  }
>  
>  static unsigned get_mmio_spte_access(u64 spte)
> @@ -381,7 +402,7 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
>  }
>  EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
>  
> -static void kvm_mmu_clear_all_pte_masks(void)
> +static void kvm_mmu_reset_all_pte_masks(void)
>  {
>  	shadow_user_mask = 0;
>  	shadow_accessed_mask = 0;
> @@ -391,6 +412,18 @@ static void kvm_mmu_clear_all_pte_masks(void)
>  	shadow_mmio_mask = 0;
>  	shadow_present_mask = 0;
>  	shadow_acc_track_mask = 0;
> +
> +	/*
> +	 * If the CPU has 46 or less physical address bits, then set an
> +	 * appropriate mask to guard against L1TF attacks. Otherwise, it is
> +	 * assumed that the CPU is not vulnerable to L1TF.
> +	 */
> +	if (boot_cpu_data.x86_phys_bits <
> +	    52 - shadow_nonpresent_or_rsvd_mask_len)
> +		shadow_nonpresent_or_rsvd_mask =
> +			rsvd_bits(boot_cpu_data.x86_phys_bits -
> +				  shadow_nonpresent_or_rsvd_mask_len,
> +				  boot_cpu_data.x86_phys_bits - 1);
>  }
>  
>  static int is_cpuid_PSE36(void)
> @@ -5500,7 +5533,7 @@ int kvm_mmu_module_init(void)
>  {
>  	int ret = -ENOMEM;
>  
> -	kvm_mmu_clear_all_pte_masks();
> +	kvm_mmu_reset_all_pte_masks();
>  
>  	pte_list_desc_cache = kmem_cache_create("pte_list_desc",
>  					    sizeof(struct pte_list_desc),
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 24c84aa87049..6e5626822743 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -6506,8 +6506,12 @@ static void kvm_set_mmio_spte_mask(void)
>  	 * Set the reserved bits and the present bit of an paging-structure
>  	 * entry to generate page fault with PFER.RSV = 1.
>  	 */
> -	 /* Mask the reserved physical address bits. */
> -	mask = rsvd_bits(maxphyaddr, 51);
> +
> +	/*
> +	 * Mask the uppermost physical address bit, which would be reserved as
> +	 * long as the supported physical address width is less than 52.
> +	 */
> +	mask = 1ull << 51;
>  
>  	/* Set the present bit. */
>  	mask |= 1ull;

-- 
Kind regards,

Sakari Ailus
e-mail: sakari.ailus@xxxxxx