On 08/04/21 13:15, Wanpeng Li wrote:
I saw this splatting:
BUG: sleeping function called from invalid context at
arch/x86/kvm/kvm_cache_regs.h:115
kvm_pdptr_read+0x20/0x60 [kvm]
kvm_mmu_load+0x3bd/0x540 [kvm]
There is a might_sleep() in kvm_pdptr_read(), however, the original
commit didn't explain more. I can send a formal one if the below fix
is acceptable.
I think we can just push make_mmu_pages_available down into
kvm_mmu_load's callees. This way it's not necessary to hold the lock
until after the PDPTR check:
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 0d92a269c5fa..f92c3695bfeb 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -3244,6 +3244,12 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
u8 shadow_root_level = mmu->shadow_root_level;
hpa_t root;
unsigned i;
+ int r;
+
+ write_lock(&vcpu->kvm->mmu_lock);
+ r = make_mmu_pages_available(vcpu);
+ if (r < 0)
+ goto out_unlock;
if (is_tdp_mmu_enabled(vcpu->kvm)) {
root = kvm_tdp_mmu_get_vcpu_root_hpa(vcpu);
@@ -3266,13 +3272,16 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu)
mmu->root_hpa = __pa(mmu->pae_root);
} else {
WARN_ONCE(1, "Bad TDP root level = %d\n", shadow_root_level);
- return -EIO;
+ r = -EIO;
}
+out_unlock:
+ write_unlock(&vcpu->kvm->mmu_lock);
+
/* root_pgd is ignored for direct MMUs. */
mmu->root_pgd = 0;
- return 0;
+ return r;
}
static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
@@ -3282,6 +3291,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
gfn_t root_gfn, root_pgd;
hpa_t root;
int i;
+ int r;
root_pgd = mmu->get_guest_pgd(vcpu);
root_gfn = root_pgd >> PAGE_SHIFT;
@@ -3300,6 +3310,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
}
}
+ write_lock(&vcpu->kvm->mmu_lock);
+ r = make_mmu_pages_available(vcpu);
+ if (r < 0)
+ goto out_unlock;
+
/*
* Do we shadow a long mode page table? If so we need to
* write-protect the guests page table root.
@@ -3308,7 +3323,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
root = mmu_alloc_root(vcpu, root_gfn, 0,
mmu->shadow_root_level, false);
mmu->root_hpa = root;
- goto set_root_pgd;
+ goto out_unlock;
}
if (WARN_ON_ONCE(!mmu->pae_root))
@@ -3350,7 +3365,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
else
mmu->root_hpa = __pa(mmu->pae_root);
-set_root_pgd:
+out_unlock:
+ write_unlock(&vcpu->kvm->mmu_lock);
mmu->root_pgd = root_pgd;
return 0;
@@ -4852,14 +4868,10 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu)
r = mmu_alloc_special_roots(vcpu);
if (r)
goto out;
- write_lock(&vcpu->kvm->mmu_lock);
- if (make_mmu_pages_available(vcpu))
- r = -ENOSPC;
- else if (vcpu->arch.mmu->direct_map)
+ if (vcpu->arch.mmu->direct_map)
r = mmu_alloc_direct_roots(vcpu);
else
r = mmu_alloc_shadow_roots(vcpu);
- write_unlock(&vcpu->kvm->mmu_lock);
if (r)
goto out;
Paolo