Currently, when we fetch an spte, we only verify that gptes match those that the walker saw if we build new shadow pages for them. However, this misses the following race: vcpu1 vcpu2 walk change gpte walk instantiate sp fetch existing sp Fix by validating every gpte, regardless of whether it is used for building a new sp or not. Signed-off-by: Avi Kivity <avi@xxxxxxxxxx> --- arch/x86/kvm/paging_tmpl.h | 44 +++++++++++++++++++++++++++++++------------- 1 files changed, 31 insertions(+), 13 deletions(-) diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 441f51c..89b2dab 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -310,7 +310,8 @@ static bool FNAME(validate_indirect_spte)(struct kvm_vcpu *vcpu, gw->pte_gpa[level - 1], &curr_pte, sizeof(curr_pte)); if (r || curr_pte != gw->ptes[level - 1]) { - kvm_mmu_put_page(sp, sptep); + if (sp) + kvm_mmu_put_page(sp, sptep); return false; } return true; @@ -325,10 +326,11 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, int *ptwrite, pfn_t pfn) { unsigned access = gw->pt_access; - struct kvm_mmu_page *sp; + struct kvm_mmu_page *uninitialized_var(sp); u64 *sptep = NULL; int uninitialized_var(level); bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); + int top_level; unsigned direct_access; struct kvm_shadow_walk_iterator iterator; @@ -339,34 +341,46 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, if (!dirty) direct_access &= ~ACC_WRITE_MASK; + top_level = vcpu->arch.mmu.root_level; + if (top_level == PT32E_ROOT_LEVEL) + top_level = PT32_ROOT_LEVEL; + /* + * Verify that the top-level gpte is still there. Since the page + * is a root page, it is either write protected (and cannot be + * changed from now on) or it is invalid (in which case, we don't + * really care if it changes underneath us after this point). + */ + if (!FNAME(validate_indirect_spte)(vcpu, NULL, NULL, gw, top_level)) + goto out_error; + for (shadow_walk_init(&iterator, vcpu, addr); shadow_walk_okay(&iterator) && iterator.level > gw->level; shadow_walk_next(&iterator)) { gfn_t table_gfn; + bool new_page = false; level = iterator.level; sptep = iterator.sptep; drop_large_spte(vcpu, sptep); - if (is_shadow_present_pte(*sptep)) - continue; - - table_gfn = gw->table_gfn[level - 2]; - sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, - false, access, sptep); + if (!is_shadow_present_pte(*sptep)) { + table_gfn = gw->table_gfn[level - 2]; + sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, + false, access, sptep); + new_page = true; + } /* * Verify that the gpte in the page we've just write * protected is still there. */ if (!FNAME(validate_indirect_spte)(vcpu, sptep, sp, - gw, level - 1)) { - kvm_release_pfn_clean(pfn); - return NULL; - } + gw, level - 1)) + goto out_error; - link_shadow_page(sptep, sp); + if (new_page) + link_shadow_page(sptep, sp); } for (; @@ -399,6 +413,10 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, gw->gfn, pfn, false, true); return sptep; + +out_error: + kvm_release_pfn_clean(pfn); + return NULL; } /* -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html