Re: [PATCH 6/9] KVM, pkeys: add pkeys support for permission_fault logic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 09/11/2015 12:54, Huaitong Han wrote:
> Protection keys define a new 4-bit protection key field (PKEY) in bits
> 62:59 of leaf entries of the page tables, the PKEY is an index to PKRU
> register(16 domains), every domain has 2 bits(write disable bit, access
> disable bit).
> 
> Static logic has been produced in update_permission_bitmask, dynamic logic
> need read pkey from page table entries, get pkru value, and deduce the
> correct result.
> 
> Signed-off-by: Huaitong Han <huaitong.han@xxxxxxxxx>
> 
> diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
> index e4202e4..bbb5555 100644
> --- a/arch/x86/kvm/mmu.h
> +++ b/arch/x86/kvm/mmu.h
> @@ -3,6 +3,7 @@
>  
>  #include <linux/kvm_host.h>
>  #include "kvm_cache_regs.h"
> +#include "x86.h"
>  
>  #define PT64_PT_BITS 9
>  #define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
> @@ -24,6 +25,11 @@
>  #define PT_PAGE_SIZE_MASK (1ULL << PT_PAGE_SIZE_SHIFT)
>  #define PT_PAT_MASK (1ULL << 7)
>  #define PT_GLOBAL_MASK (1ULL << 8)
> +
> +#define PT64_PKEY_BIT0 (1ULL << _PAGE_BIT_PKEY_BIT0)
> +#define PT64_PKEY_BIT1 (1ULL << _PAGE_BIT_PKEY_BIT1)
> +#define PT64_PKEY_BIT2 (1ULL << _PAGE_BIT_PKEY_BIT2)
> +#define PT64_PKEY_BIT3 (1ULL << _PAGE_BIT_PKEY_BIT3)
>  #define PT64_NX_SHIFT 63
>  #define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
>  
> @@ -45,6 +51,15 @@
>  #define PT_PAGE_TABLE_LEVEL 1
>  #define PT_MAX_HUGEPAGE_LEVEL (PT_PAGE_TABLE_LEVEL + KVM_NR_PAGE_SIZES - 1)
>  
> +#define PKEYS_BIT0_VALUE (1ULL << 0)
> +#define PKEYS_BIT1_VALUE (1ULL << 1)
> +#define PKEYS_BIT2_VALUE (1ULL << 2)
> +#define PKEYS_BIT3_VALUE (1ULL << 3)
> +
> +#define PKRU_READ   0
> +#define PKRU_WRITE  1
> +#define PKRU_ATTRS  2
> +
>  static inline u64 rsvd_bits(int s, int e)
>  {
>  	return ((1ULL << (e - s + 1)) - 1) << s;
> @@ -145,10 +160,44 @@ static inline bool is_write_protection(struct kvm_vcpu *vcpu)
>   * fault with the given access (in ACC_* format)?
>   */
>  static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
> -				    unsigned pte_access, unsigned pfec)
> +		unsigned pte_access, unsigned pte_pkeys, unsigned pfec)
>  {
> -	int cpl = kvm_x86_ops->get_cpl(vcpu);
> -	unsigned long rflags = kvm_x86_ops->get_rflags(vcpu);
> +	unsigned long smap, rflags;
> +	u32 pkru;
> +	int cpl, index;
> +	bool wf, uf, pk, pkru_ad, pkru_wd;
> +
> +	cpl = kvm_x86_ops->get_cpl(vcpu);
> +	rflags = kvm_x86_ops->get_rflags(vcpu);
> +
> +	pkru = read_pkru();
> +
> +	/*
> +	* PKRU defines 32 bits, there are 16 domains and 2 attribute bits per
> +	* domain in pkru, pkey is index to a defined domain, so the value of
> +	* pkey * PKRU_ATTRS + R/W is offset of a defined domain attribute.
> +	*/
> +	pkru_ad = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_READ)) & 1;
> +	pkru_wd = (pkru >> (pte_pkeys * PKRU_ATTRS + PKRU_WRITE)) & 1;
> +
> +	wf = pfec & PFERR_WRITE_MASK;
> +	uf = pfec & PFERR_USER_MASK;
> +
> +	/*
> +	* PKeys 2nd and 6th conditions:
> +	* 2.EFER_LMA=1
> +	* 6.PKRU.AD=1
> +	*	or The access is a data write and PKRU.WD=1 and
> +	*		either CR0.WP=1 or it is a user mode access
> +	*/
> +	pk = is_long_mode(vcpu) && (pkru_ad ||
> +			(pkru_wd && wf && (is_write_protection(vcpu) || uf)));
> +
> +	/*
> +	* PK bit right value in pfec equal to
> +	* PK bit current value in pfec and pk value.
> +	*/
> +	pfec &= (pk << PFERR_PK_BIT) + ~PFERR_PK_MASK;

PK is only applicable to guest page tables, but if you do not support
PKRU without EPT (patch 9), none of this is necessary, is it?

>  	/*
>  	 * If CPL < 3, SMAP prevention are disabled if EFLAGS.AC = 1.
> @@ -163,8 +212,8 @@ static inline bool permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
>  	 * but it will be one in index if SMAP checks are being overridden.
>  	 * It is important to keep this branchless.
>  	 */
> -	unsigned long smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> -	int index = (pfec >> 1) +
> +	smap = (cpl - 3) & (rflags & X86_EFLAGS_AC);
> +	index = (pfec >> 1) +
>  		    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
>  
>  	WARN_ON(pfec & PFERR_RSVD_MASK);
> diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
> index 736e6ab..99563bc 100644
> --- a/arch/x86/kvm/paging_tmpl.h
> +++ b/arch/x86/kvm/paging_tmpl.h
> @@ -253,6 +253,17 @@ static int FNAME(update_accessed_dirty_bits)(struct kvm_vcpu *vcpu,
>  	}
>  	return 0;
>  }
> +static inline unsigned FNAME(gpte_pkeys)(struct kvm_vcpu *vcpu, u64 gpte)
> +{
> +	unsigned pkeys = 0;
> +#if PTTYPE == 64
> +	pkeys = ((gpte & PT64_PKEY_BIT0) ? PKEYS_BIT0_VALUE : 0) |
> +		((gpte & PT64_PKEY_BIT1) ? PKEYS_BIT1_VALUE : 0) |
> +		((gpte & PT64_PKEY_BIT2) ? PKEYS_BIT2_VALUE : 0) |
> +		((gpte & PT64_PKEY_BIT3) ? PKEYS_BIT3_VALUE : 0);

This is just pkeys = (gpte >> _PAGE_BIT_PKEY_BIT0) & 15.

Paolo

> +#endif
> +	return pkeys;
> +}
>  
>  /*
>   * Fetch a guest pte for a guest virtual address
> @@ -265,12 +276,13 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker,
>  	pt_element_t pte;
>  	pt_element_t __user *uninitialized_var(ptep_user);
>  	gfn_t table_gfn;
> -	unsigned index, pt_access, pte_access, accessed_dirty;
> +	unsigned index, pt_access, pte_access, accessed_dirty, pte_pkeys;
>  	gpa_t pte_gpa;
>  	int offset;
>  	const int write_fault = access & PFERR_WRITE_MASK;
>  	const int user_fault  = access & PFERR_USER_MASK;
>  	const int fetch_fault = access & PFERR_FETCH_MASK;
> +	const int pk_fault = access & PFERR_PK_MASK;
>  	u16 errcode = 0;
>  	gpa_t real_gpa;
>  	gfn_t gfn;
> @@ -356,7 +368,9 @@ retry_walk:
>  		walker->ptes[walker->level - 1] = pte;
>  	} while (!is_last_gpte(mmu, walker->level, pte));
>  
> -	if (unlikely(permission_fault(vcpu, mmu, pte_access, access))) {
> +	pte_pkeys = FNAME(gpte_pkeys)(vcpu, pte);
> +	if (unlikely(permission_fault(vcpu, mmu, pte_access, pte_pkeys,
> +					access))) {
>  		errcode |= PFERR_PRESENT_MASK;
>  		goto error;
>  	}
> @@ -399,7 +413,7 @@ retry_walk:
>  	return 1;
>  
>  error:
> -	errcode |= write_fault | user_fault;
> +	errcode |= write_fault | user_fault | pk_fault;
>  	if (fetch_fault && (mmu->nx ||
>  			    kvm_read_cr4_bits(vcpu, X86_CR4_SMEP)))
>  		errcode |= PFERR_FETCH_MASK;
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 5181834..7a84b83 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4107,7 +4107,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva,
>  
>  	if (vcpu_match_mmio_gva(vcpu, gva)
>  	    && !permission_fault(vcpu, vcpu->arch.walk_mmu,
> -				 vcpu->arch.access, access)) {
> +				 vcpu->arch.access, 0, access)) {
>  		*gpa = vcpu->arch.mmio_gfn << PAGE_SHIFT |
>  					(gva & (PAGE_SIZE - 1));
>  		trace_vcpu_match_mmio(gva, *gpa, write, false);
> 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux