On Mon, Jul 12, 2010 at 07:15:50PM +0300, Avi Kivity wrote: > Currently, when we fetch an spte, we only verify that gptes match those that > the walker saw if we build new shadow pages for them. > > However, this misses the following race: > > vcpu1 vcpu2 > > walk > change gpte > walk > instantiate sp > > fetch existing sp > > Fix by validating every gpte, regardless of whether it is used for building > a new sp or not. > > Signed-off-by: Avi Kivity <avi@xxxxxxxxxx> > --- > arch/x86/kvm/paging_tmpl.h | 44 +++++++++++++++++++++++++++++++------------- > 1 files changed, 31 insertions(+), 13 deletions(-) > > diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h > index 441f51c..89b2dab 100644 > --- a/arch/x86/kvm/paging_tmpl.h > +++ b/arch/x86/kvm/paging_tmpl.h > @@ -310,7 +310,8 @@ static bool FNAME(validate_indirect_spte)(struct kvm_vcpu *vcpu, > gw->pte_gpa[level - 1], > &curr_pte, sizeof(curr_pte)); > if (r || curr_pte != gw->ptes[level - 1]) { > - kvm_mmu_put_page(sp, sptep); > + if (sp) > + kvm_mmu_put_page(sp, sptep); > return false; > } > return true; > @@ -325,10 +326,11 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, > int *ptwrite, pfn_t pfn) > { > unsigned access = gw->pt_access; > - struct kvm_mmu_page *sp; > + struct kvm_mmu_page *uninitialized_var(sp); > u64 *sptep = NULL; > int uninitialized_var(level); > bool dirty = is_dirty_gpte(gw->ptes[gw->level - 1]); > + int top_level; > unsigned direct_access; > struct kvm_shadow_walk_iterator iterator; > > @@ -339,34 +341,46 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, > if (!dirty) > direct_access &= ~ACC_WRITE_MASK; > > + top_level = vcpu->arch.mmu.root_level; > + if (top_level == PT32E_ROOT_LEVEL) > + top_level = PT32_ROOT_LEVEL; > + /* > + * Verify that the top-level gpte is still there. Since the page > + * is a root page, it is either write protected (and cannot be > + * changed from now on) or it is invalid (in which case, we don't > + * really care if it changes underneath us after this point). > + */ > + if (!FNAME(validate_indirect_spte)(vcpu, NULL, NULL, gw, top_level)) > + goto out_error; > + > for (shadow_walk_init(&iterator, vcpu, addr); > shadow_walk_okay(&iterator) && iterator.level > gw->level; > shadow_walk_next(&iterator)) { > gfn_t table_gfn; > + bool new_page = false; > > level = iterator.level; > sptep = iterator.sptep; > > drop_large_spte(vcpu, sptep); > > - if (is_shadow_present_pte(*sptep)) > - continue; > - > - table_gfn = gw->table_gfn[level - 2]; > - sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, > - false, access, sptep); > + if (!is_shadow_present_pte(*sptep)) { > + table_gfn = gw->table_gfn[level - 2]; > + sp = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1, > + false, access, sptep); > + new_page = true; > + } > > /* > * Verify that the gpte in the page we've just write > * protected is still there. > */ > if (!FNAME(validate_indirect_spte)(vcpu, sptep, sp, > - gw, level - 1)) { > - kvm_release_pfn_clean(pfn); > - return NULL; > - } > + gw, level - 1)) > + goto out_error; If its not a new page, and validation fails, can't "sp" point to a shadow page previously instantiated in the loop? -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html