On 22/09/2015 19:56, Borislav Petkov wrote: > diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c > index 69088a1ba509..3ce2b74c75dc 100644 > --- a/arch/x86/kvm/mmu.c > +++ b/arch/x86/kvm/mmu.c > @@ -3322,7 +3322,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) > break; > > reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, > - leaf); > + iterator.level); > } > > walk_shadow_page_lockless_end(vcpu); > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index c0b9ff3e1aec..a44f8fed9be1 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -7063,13 +7063,16 @@ struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, > unsigned int id) > { > struct kvm_vcpu *vcpu; > + int idx; > > if (check_tsc_unstable() && atomic_read(&kvm->online_vcpus) != 0) > printk_once(KERN_WARNING > "kvm: SMP vm created on host with unstable TSC; " > "guest TSC will not be reliable\n"); > > + idx = srcu_read_lock(&kvm->srcu); > vcpu = kvm_x86_ops->vcpu_create(kvm, id); > + srcu_read_unlock(&kvm->srcu, idx); > > return vcpu; > } Yup, looks good. Let's add more debugging output: diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3ce2b74c75dc..bf1122e9c7bf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3268,23 +3268,28 @@ static gpa_t nonpaging_gva_to_gpa_nested(struct kvm_vcpu *vcpu, gva_t vaddr, return vcpu->arch.nested_mmu.translate_gpa(vcpu, vaddr, access, exception); } -static bool -__is_rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) +static u64 +rsvd_bits_set(struct rsvd_bits_validate *rsvd_check, u64 pte, int level) { int bit7 = (pte >> 7) & 1, low6 = pte & 0x3f; + u64 mask = rsvd_check->rsvd_bits_mask[bit7][level-1]; + + if (unlikely(pte & mask)) + return mask; + if (unlikely(rsvd_check->bad_mt_xwr & (1ull << low6))) + return rsvd_check->bad_mt_xwr; - return (pte & rsvd_check->rsvd_bits_mask[bit7][level-1]) | - ((rsvd_check->bad_mt_xwr & (1ull << low6)) != 0); + return 0; } static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) { - return __is_rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level); + return rsvd_bits_set(&mmu->guest_rsvd_check, gpte, level) != 0; } -static bool is_shadow_zero_bits_set(struct kvm_mmu *mmu, u64 spte, int level) +static u64 shadow_rsvd_bits_set(struct kvm_mmu *mmu, u64 spte, int level) { - return __is_rsvd_bits_set(&mmu->shadow_zero_check, spte, level); + return rsvd_bits_set(&mmu->shadow_zero_check, spte, level); } static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) @@ -3302,6 +3307,7 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) struct kvm_shadow_walk_iterator iterator; u64 sptes[PT64_ROOT_LEVEL], spte = 0ull; int root, leaf; + u64 result; bool reserved = false; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) @@ -3321,15 +3327,20 @@ walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr, u64 *sptep) if (!is_shadow_present_pte(spte)) break; - reserved |= is_shadow_zero_bits_set(&vcpu->arch.mmu, spte, - iterator.level); + result = shadow_rsvd_bits_set(&vcpu->arch.mmu, spte, + iterator.level); + if (unlikely(result)) { + pr_err("%s: detect reserved bits on spte, addr 0x%llx " + "(level %d, 0x%llx)\n", + __func__, addr, iterator.level, result); + reserved = true; + } } walk_shadow_page_lockless_end(vcpu); if (reserved) { - pr_err("%s: detect reserved bits on spte, addr 0x%llx, dump hierarchy:\n", - __func__, addr); + pr_err("dump hierarchy:\n"); while (root > leaf) { pr_err("------ spte 0x%llx level %d.\n", sptes[root - 1], root); > [ 49.456533] walk_shadow_page_get_mmio_spte: detect reserved bits on spte, addr 0xb8000, dump hierarchy: > [ 49.465945] ------ spte 0x416ed9027 level 4. > [ 49.470221] ------ spte 0x416888027 level 3. > [ 49.474494] ------ spte 0x41694f027 level 2. > [ 49.474495] ------ spte 0xffff0000000b8f67 level 1. Thus same as before. Just to be safe, can you try using "-cpu host" on the QEMU command line and see if it changes anything? This would catch things such as an Intel CPUID on an AMD host. Paolo > [ 49.474496] ------------[ cut here ]------------ > [ 49.474515] WARNING: CPU: 4 PID: 3540 at arch/x86/kvm/mmu.c:3385 handle_mmio_page_fault.part.57+0x1a/0x20 [kvm]() > [ 49.474555] Modules linked in: tun sha256_ssse3 sha256_generic drbg binfmt_misc ipv6 vfat fat fuse dm_crypt dm_mod kvm_amd kvm crc32_pclmul aesni_intel aes_x86_64 lrw gf128mul glue_helper ablk_helper cryptd amd64_edac_mod fam15h_power k10temp edac_core amdkfd amd_iommu_v2 radeon acpi_cpufreq > [ 49.474560] CPU: 4 PID: 3540 Comm: qemu-system-x86 Not tainted 4.3.0-rc2+ #2 > [ 49.474562] Hardware name: To be filled by O.E.M. To be filled by O.E.M./M5A97 EVO R2.0, BIOS 1503 01/16/2013 > [ 49.474569] ffffffffa032f8b2 ffff880416a73b78 ffffffff812c758a 0000000000000000 > [ 49.474574] ffff880416a73bb0 ffffffff810534c1 ffff8804171b0000 000000000000000f > [ 49.474578] 00000000000b8000 0000000000000000 00000000ffffffff ffff880416a73bc0 > [ 49.474579] Call Trace: > [ 49.474586] [<ffffffff812c758a>] dump_stack+0x4e/0x84 > [ 49.474589] [<ffffffff810534c1>] warn_slowpath_common+0x91/0xd0 > [ 49.474592] [<ffffffff810535ba>] warn_slowpath_null+0x1a/0x20 > [ 49.474603] [<ffffffffa0301a5a>] handle_mmio_page_fault.part.57+0x1a/0x20 [kvm] > [ 49.474615] [<ffffffffa0309350>] tdp_page_fault+0x2a0/0x2b0 [kvm] > [ 49.474620] [<ffffffff810a282d>] ? __lock_acquire+0x57d/0x17a0 > [ 49.474633] [<ffffffffa03035a5>] kvm_mmu_page_fault+0x35/0x240 [kvm] > [ 49.474637] [<ffffffffa03886b8>] pf_interception+0x108/0x1d0 [kvm_amd] > [ 49.474642] [<ffffffffa038ad10>] handle_exit+0x150/0xa40 [kvm_amd] > [ 49.474662] [<ffffffffa02fa398>] ? kvm_arch_vcpu_ioctl_run+0x4c8/0x16f0 [kvm] > [ 49.474674] [<ffffffffa02fa403>] kvm_arch_vcpu_ioctl_run+0x533/0x16f0 [kvm] > [ 49.474686] [<ffffffffa02fa398>] ? kvm_arch_vcpu_ioctl_run+0x4c8/0x16f0 [kvm] > [ 49.474690] [<ffffffff816bd852>] ? mutex_lock_killable_nested+0x312/0x480 > [ 49.474700] [<ffffffffa02e1979>] ? kvm_vcpu_ioctl+0x79/0x6f0 [kvm] > [ 49.474705] [<ffffffff8107e133>] ? preempt_count_sub+0xb3/0x110 > [ 49.474715] [<ffffffffa02e1c3f>] kvm_vcpu_ioctl+0x33f/0x6f0 [kvm] > [ 49.474719] [<ffffffff811939d7>] do_vfs_ioctl+0x2d7/0x530 > [ 49.474722] [<ffffffff8119f889>] ? __fget_light+0x29/0x90 > [ 49.474724] [<ffffffff81193c7c>] SyS_ioctl+0x4c/0x90 > [ 49.474729] [<ffffffff816c1a9b>] entry_SYSCALL_64_fastpath+0x16/0x73 > [ 49.474732] ---[ end trace 0e0be3552b84977c ]--- > > > Thanks. > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html