Re: [PATCH v6 6/7] KVM: x86: Untag address when LAM applicable

Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx> · Mon, 20 Mar 2023 20:04:25 +0800

On 3/19/2023 4:49 PM, Binbin Wu wrote:
Untag address for 64-bit memory/mmio operand in instruction emulations
and vmexit handlers when LAM is applicable.

For instruction emulation, untag address in __linearize() before
canonical check. LAM doesn't apply to instruction fetch and invlpg,
use KVM_X86_UNTAG_ADDR_SKIP_LAM to skip LAM untag.

For vmexit handlings related to 64-bit linear address:
- Cases need to untag address
   Operand(s) of VMX instructions and INVPCID
   Operand(s) of SGX ENCLS
   Linear address in INVVPID descriptor.
- Cases LAM doesn't apply to (no change needed)
   Operand of INVLPG
   Linear address in INVPCID descriptor

Co-developed-by: Robert Hoo <robert.hu@xxxxxxxxxxxxxxx>
Signed-off-by: Robert Hoo <robert.hu@xxxxxxxxxxxxxxx>
Signed-off-by: Binbin Wu <binbin.wu@xxxxxxxxxxxxxxx>
---
  arch/x86/kvm/emulate.c    | 25 +++++++++++++++++--------
  arch/x86/kvm/vmx/nested.c |  2 ++
  arch/x86/kvm/vmx/sgx.c    |  1 +
  arch/x86/kvm/x86.c        |  4 ++++
  4 files changed, 24 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index a630c5db971c..c46f0162498e 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -688,7 +688,8 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
  				       struct segmented_address addr,
  				       unsigned *max_size, unsigned size,
  				       bool write, bool fetch,
-				       enum x86emul_mode mode, ulong *linear)
+				       enum x86emul_mode mode, ulong *linear,
+				       u64 untag_flags)
  {
  	struct desc_struct desc;
  	bool usable;
@@ -701,9 +702,10 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt,
  	*max_size = 0;
  	switch (mode) {
  	case X86EMUL_MODE_PROT64:
-		*linear = la;
+		*linear = static_call(kvm_x86_untag_addr)(ctxt->vcpu, la, untag_flags);

Per Sean's comment "Derefencing ctxt->vcpu in the emulator is not 
allowed" in V5, I will update this as following:

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index c46f0162498e..5fbce7bb3bc8 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -702,7 +702,7 @@ static __always_inline int __linearize(struct 
x86_emulate_ctxt *ctxt,
        *max_size = 0;
        switch (mode) {
        case X86EMUL_MODE_PROT64:
-               *linear = static_call(kvm_x86_untag_addr)(ctxt->vcpu, 
la, untag_flags);
+               *linear = ctxt->ops->untag_addr(ctxt, la, untag_flags);

                va_bits = ctxt_virt_addr_bits(ctxt);
                if (!__is_canonical_address(*linear, va_bits))
diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h
index 2d9662be8333..14b32c7c2abb 100644
--- a/arch/x86/kvm/kvm_emulate.h
+++ b/arch/x86/kvm/kvm_emulate.h
@@ -224,6 +224,8 @@ struct x86_emulate_ops {
        int (*leave_smm)(struct x86_emulate_ctxt *ctxt);
        void (*triple_fault)(struct x86_emulate_ctxt *ctxt);
        int (*set_xcr)(struct x86_emulate_ctxt *ctxt, u32 index, u64 xcr);
+
+       u64 (*untag_addr)(struct x86_emulate_ctxt *ctxt, u64 addr, u64 
flags);
 };

 /* Type, address-of, and value of an instruction's operand. */
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index d85f87a19f58..a3560ea7560d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -8190,6 +8190,11 @@ static void emulator_vm_bugged(struct 
x86_emulate_ctxt *ctxt)
                kvm_vm_bugged(kvm);
 }

+static u64 emulator_untag_addr(struct x86_emulate_ctxt *ctxt, u64 addr, 
u64 flags)
+{
+       return static_call(kvm_x86_untag_addr)(emul_to_vcpu(ctxt), addr, 
flags);
+}
+
 static const struct x86_emulate_ops emulate_ops = {
        .vm_bugged           = emulator_vm_bugged,
        .read_gpr            = emulator_read_gpr,
@@ -8234,6 +8239,7 @@ static const struct x86_emulate_ops emulate_ops = {
        .leave_smm           = emulator_leave_smm,
        .triple_fault        = emulator_triple_fault,
        .set_xcr             = emulator_set_xcr,
+       .untag_addr          = emulator_untag_addr,
 };


+
  		va_bits = ctxt_virt_addr_bits(ctxt);
-		if (!__is_canonical_address(la, va_bits))
+		if (!__is_canonical_address(*linear, va_bits))
  			goto bad;
  
  		*max_size = min_t(u64, ~0u, (1ull << va_bits) - la);
@@ -757,8 +759,8 @@ static int linearize(struct x86_emulate_ctxt *ctxt,
  		     ulong *linear)
  {
  	unsigned max_size;
-	return __linearize(ctxt, addr, &max_size, size, write, false,
-			   ctxt->mode, linear);
+	return __linearize(ctxt, addr, &max_size, size, false, false,
+			   ctxt->mode, linear, 0);
  }
  
  static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
@@ -771,7 +773,9 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst)
  
  	if (ctxt->op_bytes != sizeof(unsigned long))
  		addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1);
-	rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear);
+	/* skip LAM untag for instruction */
+	rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode,
+		         &linear, KVM_X86_UNTAG_ADDR_SKIP_LAM);
  	if (rc == X86EMUL_CONTINUE)
  		ctxt->_eip = addr.ea;
  	return rc;
@@ -906,9 +910,11 @@ static int __do_insn_fetch_bytes(struct x86_emulate_ctxt *ctxt, int op_size)
  	 * __linearize is called with size 0 so that it does not do any
  	 * boundary check itself.  Instead, we use max_size to check
  	 * against op_size.
+	 *
+	 * skip LAM untag for instruction
  	 */
  	rc = __linearize(ctxt, addr, &max_size, 0, false, true, ctxt->mode,
-			 &linear);
+			 &linear, KVM_X86_UNTAG_ADDR_SKIP_LAM);
  	if (unlikely(rc != X86EMUL_CONTINUE))
  		return rc;
  
@@ -3433,8 +3439,11 @@ static int em_invlpg(struct x86_emulate_ctxt *ctxt)
  {
  	int rc;
  	ulong linear;
+	unsigned max_size;
  
-	rc = linearize(ctxt, ctxt->src.addr.mem, 1, false, &linear);
+	/* skip untag for invlpg since LAM is not applied to invlpg */
+	rc = __linearize(ctxt, ctxt->src.addr.mem, &max_size, 1, false, false,
+			 ctxt->mode, &linear, KVM_X86_UNTAG_ADDR_SKIP_LAM);
  	if (rc == X86EMUL_CONTINUE)
  		ctxt->ops->invlpg(ctxt, linear);
  	/* Disable writeback. */
diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
index 2eb258992d63..dd1d28a0d147 100644
--- a/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@ -4970,6 +4970,7 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
  		else
  			*ret = off;
  
+		*ret = vmx_untag_addr(vcpu, *ret, 0);
  		/* Long mode: #GP(0)/#SS(0) if the memory address is in a
  		 * non-canonical form. This is the only check on the memory
  		 * destination for long mode!
@@ -5787,6 +5788,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu)
  	vpid02 = nested_get_vpid02(vcpu);
  	switch (type) {
  	case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
+		operand.gla = vmx_untag_addr(vcpu, operand.gla, 0);
  		if (!operand.vpid ||
  		    is_noncanonical_address(operand.gla, vcpu))
  			return nested_vmx_fail(vcpu,
diff --git a/arch/x86/kvm/vmx/sgx.c b/arch/x86/kvm/vmx/sgx.c
index 0574030b071f..527f1a902c65 100644
--- a/arch/x86/kvm/vmx/sgx.c
+++ b/arch/x86/kvm/vmx/sgx.c
@@ -37,6 +37,7 @@ static int sgx_get_encls_gva(struct kvm_vcpu *vcpu, unsigned long offset,
  	if (!IS_ALIGNED(*gva, alignment)) {
  		fault = true;
  	} else if (likely(is_64_bit_mode(vcpu))) {
+		*gva = vmx_untag_addr(vcpu, *gva, 0);
  		fault = is_noncanonical_address(*gva, vcpu);
  	} else {
  		*gva &= 0xffffffff;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e74af72f53ec..d85f87a19f58 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -13233,6 +13233,10 @@ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva)
  
  	switch (type) {
  	case INVPCID_TYPE_INDIV_ADDR:
+		/*
+		 * LAM doesn't apply to the linear address in the descriptor,
+		 * still need to be canonical
+		 */
  		if ((!pcid_enabled && (operand.pcid != 0)) ||
  		    is_noncanonical_address(operand.gla, vcpu)) {
  			kvm_inject_gp(vcpu, 0);