On Fri, Dec 23, 2022 at 12:57:21AM +0000, Sean Christopherson wrote: > Add and use a new mutex, gfn_lock, to protect accesses to the hash table > used to track which gfns are write-protected when shadowing the guest's > GTT. This fixes a bug where kvmgt_page_track_write(), which doesn't hold > kvm->mmu_lock, could race with intel_gvt_page_track_remove() and trigger > a use-after-free. > > Fixing kvmgt_page_track_write() by taking kvm->mmu_lock is not an option > as mmu_lock is a r/w spinlock, and intel_vgpu_page_track_handler() might > sleep when acquiring vgpu->cache_lock deep down the callstack: > > intel_vgpu_page_track_handler() > | > |-> page_track->handler / ppgtt_write_protection_handler() > | > |-> ppgtt_handle_guest_write_page_table_bytes() > | > |-> ppgtt_handle_guest_write_page_table() > | > |-> ppgtt_handle_guest_entry_removal() > | > |-> ppgtt_invalidate_pte() > | > |-> intel_gvt_dma_unmap_guest_page() > | > |-> mutex_lock(&vgpu->cache_lock); > This gfn_lock could lead to deadlock in below sequence. (1) kvm_write_track_add_gfn() to GFN 1 (2) kvmgt_page_track_write() for GFN 1 kvmgt_page_track_write() | |->mutex_lock(&info->vgpu_lock) |->intel_vgpu_page_track_handler (as is kvmgt_gfn_is_write_protected) | |->page_track->handler() (ppgtt_write_protection_handler()) | |->ppgtt_handle_guest_write_page_table_bytes() | |->ppgtt_handle_guest_write_page_table() | |->ppgtt_handle_guest_entry_add() --> new_present | |->ppgtt_populate_spt_by_guest_entry() | |->intel_vgpu_enable_page_track() --> for GFN 2 | |->intel_gvt_page_track_add() | |->mutex_lock(&info->gfn_lock) ===>deadlock Below fix based on this patch is to reuse vgpu_lock to protect the hash table info->ptable. Please check if it's good. diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c index b924ed079ad4..526bd973e784 100644 --- a/drivers/gpu/drm/i915/gvt/kvmgt.c +++ b/drivers/gpu/drm/i915/gvt/kvmgt.c @@ -364,7 +364,7 @@ __kvmgt_protect_table_find(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p, *res = NULL; - lockdep_assert_held(&info->gfn_lock); + lockdep_assert_held(&info->vgpu_lock); hash_for_each_possible(info->ptable, p, hnode, gfn) { if (gfn == p->gfn) { @@ -388,7 +388,7 @@ static void kvmgt_protect_table_add(struct intel_vgpu *info, gfn_t gfn) { struct kvmgt_pgfn *p; - lockdep_assert_held(&info->gfn_lock); + lockdep_assert_held(&info->vgpu_lock); if (kvmgt_gfn_is_write_protected(info, gfn)) return; @@ -1572,7 +1572,7 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) if (!info->attached) return -ESRCH; - mutex_lock(&info->gfn_lock); + lockdep_assert_held(&info->vgpu_lock); if (kvmgt_gfn_is_write_protected(info, gfn)) goto out; @@ -1581,7 +1581,6 @@ int intel_gvt_page_track_add(struct intel_vgpu *info, u64 gfn) if (!ret) kvmgt_protect_table_add(info, gfn); out: - mutex_unlock(&info->gfn_lock); return ret; } @@ -1592,7 +1591,7 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) if (!info->attached) return 0; - mutex_lock(&info->gfn_lock); + lockdep_assert_held(&info->vgpu_lock); if (!kvmgt_gfn_is_write_protected(info, gfn)) goto out; @@ -1601,7 +1600,6 @@ int intel_gvt_page_track_remove(struct intel_vgpu *info, u64 gfn) if (!ret) kvmgt_protect_table_del(info, gfn); out: - mutex_unlock(&info->gfn_lock); return ret; } @@ -1612,13 +1610,15 @@ static void kvmgt_page_track_write(gpa_t gpa, const u8 *val, int len, container_of(node, struct intel_vgpu, track_node); mutex_lock(&info->vgpu_lock); - mutex_lock(&info->gfn_lock); if (kvmgt_gfn_is_write_protected(info, gpa >> PAGE_SHIFT)) intel_vgpu_page_track_handler(info, gpa, (void *)val, len); } - mutex_unlock(&info->gfn_lock); mutex_unlock(&info->vgpu_lock); } @@ -1629,12 +1629,11 @@ static void kvmgt_page_track_remove_region(gfn_t gfn, unsigned long nr_pages, struct intel_vgpu *info = container_of(node, struct intel_vgpu, track_node); - mutex_lock(&info->gfn_lock); + lockdep_assert_held(&info->vgpu_lock); for (i = 0; i < nr_pages; i++) { if (kvmgt_gfn_is_write_protected(info, gfn + i)) kvmgt_protect_table_del(info, gfn + i); } - mutex_unlock(&info->gfn_lock); } Thanks Yan