From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> This patch has been added to the 3.12 stable tree. If you have any objections, please let us know. =============== commit d1442d85cc30ea75f7d399474ca738e0bc96f715 upstream. Far jmp/call/ret may fault while loading a new RIP. Currently KVM does not handle this case, and may result in failed vm-entry once the assignment is done. The tricky part of doing so is that loading the new CS affects the VMCS/VMCB state, so if we fail during loading the new RIP, we are left in unconsistent state. Therefore, this patch saves on 64-bit the old CS descriptor and restores it if loading RIP failed. Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> Signed-off-by: Bruce Rogers <brogers@xxxxxxxx> Signed-off-by: Jiri Slaby <jslaby@xxxxxxx> --- arch/x86/kvm/emulate.c | 113 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 77 insertions(+), 36 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index ab1d45928ce7..fe10e13070b0 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1443,7 +1443,8 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, /* Does not support long mode */ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, - u16 selector, int seg) + u16 selector, int seg, + struct desc_struct *desc) { struct desc_struct seg_desc, old_desc; u8 dpl, rpl, cpl; @@ -1570,6 +1571,8 @@ static int load_segment_descriptor(struct x86_emulate_ctxt *ctxt, } load: ctxt->ops->set_segment(ctxt, selector, &seg_desc, 0, seg); + if (desc) + *desc = seg_desc; return X86EMUL_CONTINUE; exception: emulate_exception(ctxt, err_vec, err_code, true); @@ -1776,7 +1779,7 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, (u16)selector, seg); + rc = load_segment_descriptor(ctxt, (u16)selector, seg, NULL); return rc; } @@ -1865,7 +1868,7 @@ static int __emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq) if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, cs, VCPU_SREG_CS, NULL); if (rc != X86EMUL_CONTINUE) return rc; @@ -1931,7 +1934,7 @@ static int emulate_iret_real(struct x86_emulate_ctxt *ctxt) if (rc != X86EMUL_CONTINUE) return rc; - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, NULL); if (rc != X86EMUL_CONTINUE) return rc; @@ -1970,17 +1973,29 @@ static int em_iret(struct x86_emulate_ctxt *ctxt) static int em_jmp_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned short sel; + unsigned short sel, old_sel; + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; + + /* Assignment of RIP may only fail in 64-bit mode */ + if (ctxt->mode == X86EMUL_MODE_PROT64) + ops->get_segment(ctxt, &old_sel, &old_desc, NULL, + VCPU_SREG_CS); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, &new_desc); if (rc != X86EMUL_CONTINUE) return rc; - ctxt->_eip = 0; - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); - return X86EMUL_CONTINUE; + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + /* assigning eip failed; restore the old cs */ + ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); + return rc; + } + return rc; } static int em_grp45(struct x86_emulate_ctxt *ctxt) @@ -2044,21 +2059,33 @@ static int em_ret(struct x86_emulate_ctxt *ctxt) static int em_ret_far(struct x86_emulate_ctxt *ctxt) { int rc; - unsigned long cs; + unsigned long eip, cs; + u16 old_cs; int cpl = ctxt->ops->cpl(ctxt); + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; + + if (ctxt->mode == X86EMUL_MODE_PROT64) + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, + VCPU_SREG_CS); - rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; - if (ctxt->op_bytes == 4) - ctxt->_eip = (u32)ctxt->_eip; rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); if (rc != X86EMUL_CONTINUE) return rc; /* Outer-privilege level return is not implemented */ if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) return X86EMUL_UNHANDLEABLE; - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); + rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, &new_desc); + if (rc != X86EMUL_CONTINUE) + return rc; + rc = assign_eip_far(ctxt, eip, new_desc.l); + if (rc != X86EMUL_CONTINUE) { + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); + } return rc; } @@ -2099,7 +2126,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - rc = load_segment_descriptor(ctxt, sel, seg); + rc = load_segment_descriptor(ctxt, sel, seg, NULL); if (rc != X86EMUL_CONTINUE) return rc; @@ -2479,19 +2506,19 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, * Now load segment descriptors. If fault happens at this stage * it is handled in a context of new task */ - ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR); + ret = load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); + ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); + ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); + ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); + ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, NULL); if (ret != X86EMUL_CONTINUE) return ret; @@ -2620,25 +2647,26 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, * Now load segment descriptors. If fault happenes at this stage * it is handled in a context of new task */ - ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR); + ret = load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, + NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES); + ret = load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS); + ret = load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS); + ret = load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS); + ret = load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS); + ret = load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, NULL); if (ret != X86EMUL_CONTINUE) return ret; - ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS); + ret = load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, NULL); if (ret != X86EMUL_CONTINUE) return ret; @@ -2918,24 +2946,37 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) u16 sel, old_cs; ulong old_eip; int rc; + struct desc_struct old_desc, new_desc; + const struct x86_emulate_ops *ops = ctxt->ops; - old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); old_eip = ctxt->_eip; + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); - if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) + rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, &new_desc); + if (rc != X86EMUL_CONTINUE) return X86EMUL_CONTINUE; - ctxt->_eip = 0; - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); + if (rc != X86EMUL_CONTINUE) + goto fail; ctxt->src.val = old_cs; rc = em_push(ctxt); if (rc != X86EMUL_CONTINUE) - return rc; + goto fail; ctxt->src.val = old_eip; - return em_push(ctxt); + rc = em_push(ctxt); + /* If we failed, we tainted the memory, but the very least we should + restore cs */ + if (rc != X86EMUL_CONTINUE) + goto fail; + return rc; +fail: + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); + return rc; + } static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) @@ -3081,7 +3122,7 @@ static int em_mov_sreg_rm(struct x86_emulate_ctxt *ctxt) /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg); + return load_segment_descriptor(ctxt, sel, ctxt->modrm_reg, NULL); } static int em_lldt(struct x86_emulate_ctxt *ctxt) @@ -3090,7 +3131,7 @@ static int em_lldt(struct x86_emulate_ctxt *ctxt) /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR); + return load_segment_descriptor(ctxt, sel, VCPU_SREG_LDTR, NULL); } static int em_ltr(struct x86_emulate_ctxt *ctxt) @@ -3099,7 +3140,7 @@ static int em_ltr(struct x86_emulate_ctxt *ctxt) /* Disable writeback. */ ctxt->dst.type = OP_NONE; - return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR); + return load_segment_descriptor(ctxt, sel, VCPU_SREG_TR, NULL); } static int em_invlpg(struct x86_emulate_ctxt *ctxt) -- 2.2.2 -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html