> > +#define INVALID_INDEX (-1) > + > static int mmu_unsync_walk(struct kvm_mmu_page *sp, > struct kvm_mmu_pages *pvec) > { > if (!sp->unsync_children) > return 0; > > - mmu_pages_add(pvec, sp, 0); > + /* > + * do not count the index in the parent of the sp we're > + * walking start from. > + */ > + mmu_pages_add(pvec, sp, INVALID_INDEX); > return __mmu_unsync_walk(sp, pvec); > } > > @@ -1980,8 +1986,11 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, > return n; > } > > - parents->parent[sp->role.level-2] = sp; > - parents->idx[sp->role.level-1] = pvec->page[n].idx; > + parents->parent[sp->role.level - 2] = sp; > + > + /* skip setting idex of the sp we start from. */ > + if (pvec->page[n].idx != INVALID_INDEX) > + parents->idx[sp->role.level - 1] = pvec->page[n].idx; So far so good. > } > > return n; > @@ -1999,6 +2008,7 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) > if (!sp) > return; > > + WARN_ON(idx != INVALID_INDEX); Shouldn't this warn if idx is *equal* to INVALID_INDEX? > clear_unsync_child_bit(sp, idx); > level++; > } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); > @@ -2008,7 +2018,7 @@ static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, > struct mmu_page_path *parents, > struct kvm_mmu_pages *pvec) > { > - parents->parent[parent->role.level-1] = NULL; > + parents->parent[parent->role.level - 2] = NULL; This is meant to stop mmu_pages_clear_parents _after_ it has processed sp, so the "-1" is correct. The right fix would be: if (parent->role.level < PT64_ROOT_LEVEL-1) parents->parent[parent->role.level - 1] = NULL; and no one has noticed it so far because if the bug hits you just write over the beginning of ->idx (which so far is uninitialized) and nothing bad happens. I'm thinking of the following (untested) patch instead: -------- 8< -------------- From: Paolo Bonzini <pbonzini@xxxxxxxxxx> Subject: [PATCH] KVM: MMU: Fix ubsan warnings kvm_mmu_pages_init is doing some really yucky stuff. It is setting up a sentinel for mmu_page_clear_parents; however, because of a) the way levels are numbered starting from 1 and b) the way mmu_page_path sizes its arrays with PT64_ROOT_LEVEL-1 elements, the access can be out of bounds. This is harmless because the code overwrites up to the first two elements of parents->idx and these are initialized, and because the sentinel is not needed in this case---mmu_page_clear_parents exits anyway when it gets to the end of the array. However ubsan complains, and everyone else should too. This fix does three things. First it makes the mmu_page_path arrays PT64_ROOT_LEVEL elements in size, so that we can write to them without checking the level in advance. Second it disintegrates kvm_mmu_pages_init between mmu_unsync_walk (which resets struct kvm_mmu_pages) and mmu_pages_next (which unconditionally places a NULL sentinel at the end of the current path). This is okay because the mmu_page_path is only used in mmu_pages_clear_parents; mmu_pages_clear_parents itself is called within a for_each_sp iterator, and hence always after a call to mmu_pages_next. Third it changes mmu_pages_clear_parents to just use the sentinel to stop iteration, without checking the bounds on level. Reported-by: Sasha Levin <sasha.levin@xxxxxxxxxx> Reported-by: Mike Krinkin <krinkin.m.u@xxxxxxxxx> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 33cc9f3f5b02..f4f5e7ca14dd 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1843,6 +1843,7 @@ static int __mmu_unsync_walk(struct kvm_mmu_page *sp, static int mmu_unsync_walk(struct kvm_mmu_page *sp, struct kvm_mmu_pages *pvec) { + pvec->nr = 0; if (!sp->unsync_children) return 0; @@ -1956,8 +1957,8 @@ static void kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn) } struct mmu_page_path { - struct kvm_mmu_page *parent[PT64_ROOT_LEVEL-1]; - unsigned int idx[PT64_ROOT_LEVEL-1]; + struct kvm_mmu_page *parent[PT64_ROOT_LEVEL]; + unsigned int idx[PT64_ROOT_LEVEL]; }; #define for_each_sp(pvec, sp, parents, i) \ @@ -1971,19 +1972,21 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, int i) { int n; + int level = PT_PAGE_TABLE_LEVEL; for (n = i+1; n < pvec->nr; n++) { struct kvm_mmu_page *sp = pvec->page[n].sp; - if (sp->role.level == PT_PAGE_TABLE_LEVEL) { - parents->idx[0] = pvec->page[n].idx; - return n; - } + level = sp->role.level; + parents->idx[level-1] = pvec->page[n].idx; - parents->parent[sp->role.level-2] = sp; - parents->idx[sp->role.level-1] = pvec->page[n].idx; + if (level == PT_PAGE_TABLE_LEVEL) + break; + + parents->parent[level-2] = sp; } + parents->parent[level-1] = NULL; return n; } @@ -1994,22 +1996,13 @@ static void mmu_pages_clear_parents(struct mmu_page_path *parents) do { unsigned int idx = parents->idx[level]; - sp = parents->parent[level]; if (!sp) return; clear_unsync_child_bit(sp, idx); level++; - } while (level < PT64_ROOT_LEVEL-1 && !sp->unsync_children); -} - -static void kvm_mmu_pages_init(struct kvm_mmu_page *parent, - struct mmu_page_path *parents, - struct kvm_mmu_pages *pvec) -{ - parents->parent[parent->role.level-1] = NULL; - pvec->nr = 0; + } while (!sp->unsync_children); } static void mmu_sync_children(struct kvm_vcpu *vcpu, @@ -2021,7 +2014,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, struct kvm_mmu_pages pages; LIST_HEAD(invalid_list); - kvm_mmu_pages_init(parent, &parents, &pages); while (mmu_unsync_walk(parent, &pages)) { bool protected = false; @@ -2037,7 +2029,6 @@ static void mmu_sync_children(struct kvm_vcpu *vcpu, } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); cond_resched_lock(&vcpu->kvm->mmu_lock); - kvm_mmu_pages_init(parent, &parents, &pages); } } @@ -2269,7 +2260,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, if (parent->role.level == PT_PAGE_TABLE_LEVEL) return 0; - kvm_mmu_pages_init(parent, &parents, &pages); while (mmu_unsync_walk(parent, &pages)) { struct kvm_mmu_page *sp; @@ -2278,7 +2268,6 @@ static int mmu_zap_unsync_children(struct kvm *kvm, mmu_pages_clear_parents(&parents); zapped++; } - kvm_mmu_pages_init(parent, &parents, &pages); } return zapped; Paolo -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html