For tdh_mem_range_block(), tdh_mem_track(), tdh_mem_page_remove(), - Upon detection of TDX_OPERAND_BUSY, retry each SEAMCALL only once. - During the retry, kick off all vCPUs and prevent any vCPU from entering to avoid potential contentions. Signed-off-by: Yan Zhao <yan.y.zhao@xxxxxxxxx> --- arch/x86/include/asm/kvm_host.h | 2 ++ arch/x86/kvm/vmx/tdx.c | 49 +++++++++++++++++++++++++-------- 2 files changed, 40 insertions(+), 11 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 521c7cf725bc..bb7592110337 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -123,6 +123,8 @@ #define KVM_REQ_HV_TLB_FLUSH \ KVM_ARCH_REQ_FLAGS(32, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_UPDATE_PROTECTED_GUEST_STATE KVM_ARCH_REQ(34) +#define KVM_REQ_NO_VCPU_ENTER_INPROGRESS \ + KVM_ARCH_REQ_FLAGS(33, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/vmx/tdx.c b/arch/x86/kvm/vmx/tdx.c index 60d9e9d050ad..ed6b41bbcec6 100644 --- a/arch/x86/kvm/vmx/tdx.c +++ b/arch/x86/kvm/vmx/tdx.c @@ -311,6 +311,20 @@ static void tdx_clear_page(unsigned long page_pa) __mb(); } +static void tdx_no_vcpus_enter_start(struct kvm *kvm) +{ + kvm_make_all_cpus_request(kvm, KVM_REQ_NO_VCPU_ENTER_INPROGRESS); +} + +static void tdx_no_vcpus_enter_stop(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + unsigned long i; + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_clear_request(KVM_REQ_NO_VCPU_ENTER_INPROGRESS, vcpu); +} + /* TDH.PHYMEM.PAGE.RECLAIM is allowed only when destroying the TD. */ static int __tdx_reclaim_page(hpa_t pa) { @@ -1648,15 +1662,20 @@ static int tdx_sept_drop_private_spte(struct kvm *kvm, gfn_t gfn, if (KVM_BUG_ON(!is_hkid_assigned(kvm_tdx), kvm)) return -EINVAL; - do { - /* - * When zapping private page, write lock is held. So no race - * condition with other vcpu sept operation. Race only with - * TDH.VP.ENTER. - */ + /* + * When zapping private page, write lock is held. So no race + * condition with other vcpu sept operation. Race only with + * TDH.VP.ENTER. + */ + err = tdh_mem_page_remove(kvm_tdx->tdr_pa, gpa, tdx_level, &entry, + &level_state); + if ((err & TDX_OPERAND_BUSY)) { + /* After no vCPUs enter, the second retry is expected to succeed */ + tdx_no_vcpus_enter_start(kvm); err = tdh_mem_page_remove(kvm_tdx->tdr_pa, gpa, tdx_level, &entry, &level_state); - } while (unlikely(err == TDX_ERROR_SEPT_BUSY)); + tdx_no_vcpus_enter_stop(kvm); + } if (unlikely(kvm_tdx->state != TD_STATE_RUNNABLE && err == (TDX_EPT_WALK_FAILED | TDX_OPERAND_ID_RCX))) { @@ -1728,8 +1747,12 @@ static int tdx_sept_zap_private_spte(struct kvm *kvm, gfn_t gfn, WARN_ON_ONCE(level != PG_LEVEL_4K); err = tdh_mem_range_block(kvm_tdx->tdr_pa, gpa, tdx_level, &entry, &level_state); - if (unlikely(err == TDX_ERROR_SEPT_BUSY)) - return -EAGAIN; + if (unlikely(err & TDX_OPERAND_BUSY)) { + /* After no vCPUs enter, the second retry is expected to succeed */ + tdx_no_vcpus_enter_start(kvm); + err = tdh_mem_range_block(kvm_tdx->tdr_pa, gpa, tdx_level, &entry, &level_state); + tdx_no_vcpus_enter_stop(kvm); + } if (KVM_BUG_ON(err, kvm)) { pr_tdx_error_2(TDH_MEM_RANGE_BLOCK, err, entry, level_state); return -EIO; @@ -1772,9 +1795,13 @@ static void tdx_track(struct kvm *kvm) lockdep_assert_held_write(&kvm->mmu_lock); - do { + err = tdh_mem_track(kvm_tdx->tdr_pa); + if ((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY) { + /* After no vCPUs enter, the second retry is expected to succeed */ + tdx_no_vcpus_enter_start(kvm); err = tdh_mem_track(kvm_tdx->tdr_pa); - } while (unlikely((err & TDX_SEAMCALL_STATUS_MASK) == TDX_OPERAND_BUSY)); + tdx_no_vcpus_enter_stop(kvm); + } if (KVM_BUG_ON(err, kvm)) pr_tdx_error(TDH_MEM_TRACK, err); -- 2.43.2