OK, I've finally found that this fixes the race: --- x86.c.old 2021-03-20 12:51:14.000000000 +0300 +++ x86.c 2021-06-26 02:28:37.082919492 +0300 @@ -9176,8 +9176,10 @@ if (__xfer_to_guest_mode_work_pending()) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); r = xfer_to_guest_mode_handle_work(vcpu); - if (r) + if (r) { +kvm_clear_exception_queue(vcpu); return r; +} vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); } } This is where it returns to user with the PF exception still pending. So... any ideas?