Just some editing of the comments: > diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h > index 44adbb8..8df2581 100644 > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -326,6 +326,15 @@ struct kvm_mmu { > */ > u8 permissions[16]; > > + /* > + * PKRU bitmap mask indicates if pkey (ADi/WDi) check is needed > + * > + * There are 16 domains which are indexed by page fault error > + * code [4:1] and the PFEC.RSVD is replaced by ACC_USER_MASK, > + * each domain has 2 bits which indicate AD and WD of pkey. + * The pkru_mask indicates if protection key checks are needed. It + * consists of 16 domains indexed by page fault error code bits [4:1], + * with PFEC.RSVD replaced by ACC_USER_MASK from the page tables. + * Each domain has 2 bits which are ANDed with AD and WD from PKRU. > + */ > + u32 pkru_mask; > + > u64 *pae_root; > u64 *lm_root; > > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 0e20230..e5604d1 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -3836,6 +3836,72 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, > } > } > > +/* > +* PKU:additional mechanism by which the paging controls access to user-mode > +* addresses based on the value in the PKRU register. A fault is considered > +* as a PKU violation if all of the following conditions are true: > +* 1.CR4_PKE=1. > +* 2.EFER_LMA=1. > +* 3.page is present with no reserved bit violations. > +* 4.the access is not an instruction fetch. > +* 5.the access is to a user page. > +* 6.PKRU.AD=1 > +* or The access is a data write and > +* PKRU.WD=1 and either CR0.WP=1 or it is a user access. > +* > +* PKRU bitmask is produced according to the conditions above. +* PKU is an additional mechanism by which the paging controls access to +* user-mode addresses based on the value in the PKRU register. Protection +* key violations are reported through a bit in the page fault error code. +* Unlike other bits of the error code, the PK bit is not known at the +* call site of e.g. gva_to_gpa; it must be computed directly in +* permission_fault based on two bits of PKRU, on some machine state (CR4, +* CR0, EFER, CPL), and on other bits of the error code and the page tables. * -* PKRU bitmask is produced according to the conditions above. +* In particular the following conditions come from the error code, the +* page tables and the machine state: +* - PK is always zero unless CR4.PKE=1 and EFER.LMA=1 +* - PK is always zero if RSVD=1 (reserved bit set) or F=1 (instruction fetch) +* - PK is always zero if U=0 in the page tables +* - PKRU.WD is ignored if CR0.WP=0 and the access is a supervisor access. +* +* The PKRU bitmask caches the result of these four conditions. The error +* code (minus the P bit) and the page table's U bit form an index into the +* PKRU bitmask. Two bits of the PKRU bitmask are then extracted and ANDed +* with the two bits of the PKRU register corresponding to the protection key. +* For the first three conditions above the bits will be 00, thus masking +* away both AD and WD. For the last condition, only WD will be masked away. Thanks, Paolo > +*/ > +static void update_pkru_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, > + bool ept) > +{ > + unsigned bit; > + bool wp; > + > + if (ept) { > + mmu->pkru_mask = 0; > + return; > + } > + > + /* PKEY is enabled only if CR4.PKE and EFER.LMA are both set. */ > + if (!kvm_read_cr4_bits(vcpu, X86_CR4_PKE) || !is_long_mode(vcpu)) { > + mmu->pkru_mask = 0; > + return; > + } > + > + wp = is_write_protection(vcpu); > + > + for (bit = 0; bit < ARRAY_SIZE(mmu->permissions); ++bit) { > + unsigned pfec, pkey_bits; > + bool check_pkey, check_write, ff, uf, wf, pte_user; > + > + pfec = bit << 1; > + ff = pfec & PFERR_FETCH_MASK; > + uf = pfec & PFERR_USER_MASK; > + wf = pfec & PFERR_WRITE_MASK; > + > + /* PFEC.RSVD is replaced by ACC_USER_MASK. */ > + pte_user = pfec & PFERR_RSVD_MASK; > + > + /* > + * Only need to check the access which is not an > + * instruction fetch and is to a user page. > + */ > + check_pkey = (!ff && pte_user); > + /* > + * write access is controlled by PKRU if it is a > + * user access or CR0.WP = 1. > + */ > + check_write = check_pkey && wf && (uf || wp); > + > + /* PKRU.AD stops both read and write access. */ > + pkey_bits = !!check_pkey; > + /* PKRU.WD stops write access. */ > + pkey_bits |= (!!check_write) << 1; > + > + mmu->pkru_mask |= (pkey_bits & 3) << pfec; > + } > +} > + > static void update_last_nonleaf_level(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) > { > unsigned root_level = mmu->root_level; > @@ -3863,6 +3929,7 @@ static void paging64_init_context_common(struct kvm_vcpu *vcpu, > > reset_rsvds_bits_mask(vcpu, context); > update_permission_bitmask(vcpu, context, false); > + update_pkru_bitmask(vcpu, context, false); > update_last_nonleaf_level(vcpu, context); > > MMU_WARN_ON(!is_pae(vcpu)); > @@ -3890,6 +3957,7 @@ static void paging32_init_context(struct kvm_vcpu *vcpu, > > reset_rsvds_bits_mask(vcpu, context); > update_permission_bitmask(vcpu, context, false); > + update_pkru_bitmask(vcpu, context, false); > update_last_nonleaf_level(vcpu, context); > > context->page_fault = paging32_page_fault; > @@ -3948,6 +4016,7 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) > } > > update_permission_bitmask(vcpu, context, false); > + update_pkru_bitmask(vcpu, context, false); > update_last_nonleaf_level(vcpu, context); > reset_tdp_shadow_zero_bits_mask(vcpu, context); > } > @@ -4000,6 +4069,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) > context->direct_map = false; > > update_permission_bitmask(vcpu, context, true); > + update_pkru_bitmask(vcpu, context, true); > reset_rsvds_bits_mask_ept(vcpu, context, execonly); > reset_ept_shadow_zero_bits_mask(vcpu, context, execonly); > } > @@ -4054,6 +4124,7 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) > } > > update_permission_bitmask(vcpu, g_context, false); > + update_pkru_bitmask(vcpu, g_context, false); > update_last_nonleaf_level(vcpu, g_context); > } > > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html