[patch added to the 3.12 stable tree] KVM: x86: Handle errors when RIP is set during far jumps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>

This patch has been added to the 3.12 stable tree. If you have any
objections, please let us know.

===============

commit d1442d85cc30ea75f7d399474ca738e0bc96f715 upstream.

Far jmp/call/ret may fault while loading a new RIP.  Currently KVM does not
handle this case, and may result in failed vm-entry once the assignment is
done.  The tricky part of doing so is that loading the new CS affects the
VMCS/VMCB state, so if we fail during loading the new RIP, we are left in
unconsistent state.  Therefore, this patch saves on 64-bit the old CS
descriptor and restores it if loading RIP failed.

Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>
Signed-off-by: Bruce Rogers <brogers@xxxxxxxx>
Signed-off-by: Jiri Slaby <jslaby@xxxxxxx>
---
 arch/x86/kvm/emulate.c | 113 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 77 insertions(+), 36 deletions(-)

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index ab1d45928ce7..fe10e13070b0 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -1443,7 +1443,8 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 
 /* Does not support long mode */
 static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
-				   u16 selector, int seg)
+				   u16 selector, int seg,
+				   struct desc_struct *desc)
 {
 	struct desc_struct seg_desc, old_desc;
 	u8 dpl, rpl, cpl;
@@ -1570,6 +1571,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
 	}
 load:
 	ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg);
+	if (desc)
+		*desc = seg_desc;
 	return X86EMUL_CONTINUE;
 exception:
 	emulate_exception(ctxt, err_vec, err_code, true);
@@ -1776,7 +1779,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = load_segment_descriptor(ctxt, (u16)selector, seg);
+	rc = load_segment_descriptor(ctxt, (u16)selector, seg, NULL);
 	return rc;
 }
 
@@ -1865,7 +1868,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS);
+	rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS, NULL);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -1931,7 +1934,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt)
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, NULL);
 
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
@@ -1970,17 +1973,29 @@ static int em_iret(struct x86_emulate_ctxt *ctxt)
 static int em_jmp_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
-	unsigned short sel;
+	unsigned short sel, old_sel;
+	struct desc_struct old_desc, new_desc;
+	const struct x86_emulate_ops *ops = ctxt->ops;
+
+	/* Assignment of RIP may only fail in 64-bit mode */
+	if (ctxt->mode == X86EMUL_MODE_PROT64)
+		ops->get_segment(ctxt, &old_sel, &old_desc, NULL,
+				 VCPU_SREG_CS);
 
 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
-	rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS);
+	rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, &new_desc);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
-	ctxt->_eip = 0;
-	memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
-	return X86EMUL_CONTINUE;
+	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	if (rc != X86EMUL_CONTINUE) {
+		WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+		/* assigning eip failed; restore the old cs */
+		ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS);
+		return rc;
+	}
+	return rc;
 }
 
 static int em_grp45(struct x86_emulate_ctxt *ctxt)
@@ -2044,21 +2059,33 @@ static int em_ret(struct x86_emulate_ctxt *ctxt)
 static int em_ret_far(struct x86_emulate_ctxt *ctxt)
 {
 	int rc;
-	unsigned long cs;
+	unsigned long eip, cs;
+	u16 old_cs;
 	int cpl = ctxt->ops->cpl(ctxt);
+	struct desc_struct old_desc, new_desc;
+	const struct x86_emulate_ops *ops = ctxt->ops;
+
+	if (ctxt->mode == X86EMUL_MODE_PROT64)
+		ops->get_segment(ctxt, &old_cs, &old_desc, NULL,
+				 VCPU_SREG_CS);
 
-	rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes);
+	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
-	if (ctxt->op_bytes == 4)
-		ctxt->_eip = (u32)ctxt->_eip;
 	rc = emulate_pop(ctxt, &cs, ctxt->op_bytes);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 	/* Outer-privilege level return is not implemented */
 	if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl)
 		return X86EMUL_UNHANDLEABLE;
-	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS);
+	rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, &new_desc);
+	if (rc != X86EMUL_CONTINUE)
+		return rc;
+	rc = assign_eip_far(ctxt, eip, new_desc.l);
+	if (rc != X86EMUL_CONTINUE) {
+		WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64);
+		ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+	}
 	return rc;
 }
 
@@ -2099,7 +2126,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt)
 
 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
 
-	rc = load_segment_descriptor(ctxt, sel, seg);
+	rc = load_segment_descriptor(ctxt, sel, seg, NULL);
 	if (rc != X86EMUL_CONTINUE)
 		return rc;
 
@@ -2479,19 +2506,19 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt,
 	 * Now load segment descriptors. If fault happens at this stage
 	 * it is handled in a context of new task
 	 */
-	ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR);
+	ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -2620,25 +2647,26 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt,
 	 * Now load segment descriptors. If fault happenes at this stage
 	 * it is handled in a context of new task
 	 */
-	ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR);
+	ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR,
+				      NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES);
+	ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS);
+	ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS);
+	ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS);
+	ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS);
+	ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
-	ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS);
+	ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, NULL);
 	if (ret != X86EMUL_CONTINUE)
 		return ret;
 
@@ -2918,24 +2946,37 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
 	u16 sel, old_cs;
 	ulong old_eip;
 	int rc;
+	struct desc_struct old_desc, new_desc;
+	const struct x86_emulate_ops *ops = ctxt->ops;
 
-	old_cs = get_segment_selector(ctxt, VCPU_SREG_CS);
 	old_eip = ctxt->_eip;
+	ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS);
 
 	memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2);
-	if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS))
+	rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, &new_desc);
+	if (rc != X86EMUL_CONTINUE)
 		return X86EMUL_CONTINUE;
 
-	ctxt->_eip = 0;
-	memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes);
+	rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l);
+	if (rc != X86EMUL_CONTINUE)
+		goto fail;
 
 	ctxt->src.val = old_cs;
 	rc = em_push(ctxt);
 	if (rc != X86EMUL_CONTINUE)
-		return rc;
+		goto fail;
 
 	ctxt->src.val = old_eip;
-	return em_push(ctxt);
+	rc = em_push(ctxt);
+	/* If we failed, we tainted the memory, but the very least we should
+	   restore cs */
+	if (rc != X86EMUL_CONTINUE)
+		goto fail;
+	return rc;
+fail:
+	ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS);
+	return rc;
+
 }
 
 static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
@@ -3081,7 +3122,7 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt)
 
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
-	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg);
+	return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg, NULL);
 }
 
 static int em_lldt(struct x86_emulate_ctxt *ctxt)
@@ -3090,7 +3131,7 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt)
 
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
-	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR);
+	return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR, NULL);
 }
 
 static int em_ltr(struct x86_emulate_ctxt *ctxt)
@@ -3099,7 +3140,7 @@ static int em_ltr(struct x86_emulate_ctxt *ctxt)
 
 	/* Disable writeback. */
 	ctxt->dst.type = OP_NONE;
-	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR);
+	return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR, NULL);
 }
 
 static int em_invlpg(struct x86_emulate_ctxt *ctxt)
-- 
2.2.2

--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]