This patch has bugs which are addressed by http://www.spinics.net/lists/kvm/msg109664.html . Please apply it on top of this patch. Sorry for the mess. Nadav > On Nov 10, 2014, at 04:37, gregkh@xxxxxxxxxxxxxxxxxxx wrote: > > > This is a note to let you know that I've just added the patch titled > > KVM: x86: Handle errors when RIP is set during far jumps > > to the 3.17-stable tree which can be found at: > http://www.kernel.org/git/?p=linux/kernel/git/stable/stable-queue.git;a=summary > > The filename of the patch is: > kvm-x86-handle-errors-when-rip-is-set-during-far-jumps.patch > and it can be found in the queue-3.17 subdirectory. > > If you, or anyone else, feels it should not be added to the stable tree, > please let <stable@xxxxxxxxxxxxxxx> know about it. > > >> From d1442d85cc30ea75f7d399474ca738e0bc96f715 Mon Sep 17 00:00:00 2001 > From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> > Date: Thu, 18 Sep 2014 22:39:39 +0300 > Subject: KVM: x86: Handle errors when RIP is set during far jumps > > From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> > > commit d1442d85cc30ea75f7d399474ca738e0bc96f715 upstream. > > Far jmp/call/ret may fault while loading a new RIP. Currently KVM does not > handle this case, and may result in failed vm-entry once the assignment is > done. The tricky part of doing so is that loading the new CS affects the > VMCS/VMCB state, so if we fail during loading the new RIP, we are left in > unconsistent state. Therefore, this patch saves on 64-bit the old CS > descriptor and restores it if loading RIP failed. > > This fixes CVE-2014-3647. > > Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx> > > --- > arch/x86/kvm/emulate.c | 118 ++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 88 insertions(+), 30 deletions(-) > > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -1467,7 +1467,9 @@ static int write_segment_descriptor(stru > > /* Does not support long mode */ > static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt, > - u16 selector, int seg, u8 cpl, bool in_task_switch) > + u16 selector, int seg, u8 cpl, > + bool in_task_switch, > + struct desc_struct *desc) > { > struct desc_struct seg_desc, old_desc; > u8 dpl, rpl; > @@ -1599,6 +1601,8 @@ static int __load_segment_descriptor(str > } > load: > ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg); > + if (desc) > + *desc = seg_desc; > return X86EMUL_CONTINUE; > exception: > emulate_exception(ctxt, err_vec, err_code, true); > @@ -1609,7 +1613,7 @@ static int load_segment_descriptor(struc > u16 selector, int seg) > { > u8 cpl = ctxt->ops->cpl(ctxt); > - return __load_segment_descriptor(ctxt, selector, seg, cpl, false); > + return __load_segment_descriptor(ctxt, selector, seg, cpl, false, NULL); > } > > static void write_register_operand(struct operand *op) > @@ -2003,17 +2007,31 @@ static int em_iret(struct x86_emulate_ct > static int em_jmp_far(struct x86_emulate_ctxt *ctxt) > { > int rc; > - unsigned short sel; > + unsigned short sel, old_sel; > + struct desc_struct old_desc, new_desc; > + const struct x86_emulate_ops *ops = ctxt->ops; > + u8 cpl = ctxt->ops->cpl(ctxt); > + > + /* Assignment of RIP may only fail in 64-bit mode */ > + if (ctxt->mode == X86EMUL_MODE_PROT64) > + ops->get_segment(ctxt, &old_sel, &old_desc, NULL, > + VCPU_SREG_CS); > > memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); > > - rc = load_segment_descriptor(ctxt, sel, VCPU_SREG_CS); > + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, > + &new_desc); > if (rc != X86EMUL_CONTINUE) > return rc; > > - ctxt->_eip = 0; > - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); > - return X86EMUL_CONTINUE; > + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); > + if (rc != X86EMUL_CONTINUE) { > + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); > + /* assigning eip failed; restore the old cs */ > + ops->set_segment(ctxt, old_sel, &old_desc, 0, VCPU_SREG_CS); > + return rc; > + } > + return rc; > } > > static int em_grp45(struct x86_emulate_ctxt *ctxt) > @@ -2080,21 +2098,34 @@ static int em_ret(struct x86_emulate_ctx > static int em_ret_far(struct x86_emulate_ctxt *ctxt) > { > int rc; > - unsigned long cs; > + unsigned long eip, cs; > + u16 old_cs; > int cpl = ctxt->ops->cpl(ctxt); > + struct desc_struct old_desc, new_desc; > + const struct x86_emulate_ops *ops = ctxt->ops; > + > + if (ctxt->mode == X86EMUL_MODE_PROT64) > + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, > + VCPU_SREG_CS); > > - rc = emulate_pop(ctxt, &ctxt->_eip, ctxt->op_bytes); > + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); > if (rc != X86EMUL_CONTINUE) > return rc; > - if (ctxt->op_bytes == 4) > - ctxt->_eip = (u32)ctxt->_eip; > rc = emulate_pop(ctxt, &cs, ctxt->op_bytes); > if (rc != X86EMUL_CONTINUE) > return rc; > /* Outer-privilege level return is not implemented */ > if (ctxt->mode >= X86EMUL_MODE_PROT16 && (cs & 3) > cpl) > return X86EMUL_UNHANDLEABLE; > - rc = load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS); > + rc = __load_segment_descriptor(ctxt, (u16)cs, VCPU_SREG_CS, 0, false, > + &new_desc); > + if (rc != X86EMUL_CONTINUE) > + return rc; > + rc = assign_eip_far(ctxt, eip, new_desc.l); > + if (rc != X86EMUL_CONTINUE) { > + WARN_ON(!ctxt->mode != X86EMUL_MODE_PROT64); > + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); > + } > return rc; > } > > @@ -2521,19 +2552,24 @@ static int load_state_from_tss16(struct > * Now load segment descriptors. If fault happens at this stage > * it is handled in a context of new task > */ > - ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->ldt, VCPU_SREG_LDTR, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > > @@ -2658,25 +2694,32 @@ static int load_state_from_tss32(struct > * Now load segment descriptors. If fault happenes at this stage > * it is handled in a context of new task > */ > - ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->ldt_selector, VCPU_SREG_LDTR, > + cpl, true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->es, VCPU_SREG_ES, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->cs, VCPU_SREG_CS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->ss, VCPU_SREG_SS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->ds, VCPU_SREG_DS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->fs, VCPU_SREG_FS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > - ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, true); > + ret = __load_segment_descriptor(ctxt, tss->gs, VCPU_SREG_GS, cpl, > + true, NULL); > if (ret != X86EMUL_CONTINUE) > return ret; > > @@ -2959,24 +3002,39 @@ static int em_call_far(struct x86_emulat > u16 sel, old_cs; > ulong old_eip; > int rc; > + struct desc_struct old_desc, new_desc; > + const struct x86_emulate_ops *ops = ctxt->ops; > + int cpl = ctxt->ops->cpl(ctxt); > > - old_cs = get_segment_selector(ctxt, VCPU_SREG_CS); > old_eip = ctxt->_eip; > + ops->get_segment(ctxt, &old_cs, &old_desc, NULL, VCPU_SREG_CS); > > memcpy(&sel, ctxt->src.valptr + ctxt->op_bytes, 2); > - if (load_segment_descriptor(ctxt, sel, VCPU_SREG_CS)) > + rc = __load_segment_descriptor(ctxt, sel, VCPU_SREG_CS, cpl, false, > + &new_desc); > + if (rc != X86EMUL_CONTINUE) > return X86EMUL_CONTINUE; > > - ctxt->_eip = 0; > - memcpy(&ctxt->_eip, ctxt->src.valptr, ctxt->op_bytes); > + rc = assign_eip_far(ctxt, ctxt->src.val, new_desc.l); > + if (rc != X86EMUL_CONTINUE) > + goto fail; > > ctxt->src.val = old_cs; > rc = em_push(ctxt); > if (rc != X86EMUL_CONTINUE) > - return rc; > + goto fail; > > ctxt->src.val = old_eip; > - return em_push(ctxt); > + rc = em_push(ctxt); > + /* If we failed, we tainted the memory, but the very least we should > + restore cs */ > + if (rc != X86EMUL_CONTINUE) > + goto fail; > + return rc; > +fail: > + ops->set_segment(ctxt, old_cs, &old_desc, 0, VCPU_SREG_CS); > + return rc; > + > } > > static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) > > > Patches currently in stable-queue which might be from namit@xxxxxxxxxxxxxxxxx are > > queue-3.17/kvm-x86-emulator-fixes-for-eip-canonical-checks-on-near-branches.patch > queue-3.17/kvm-x86-check-non-canonical-addresses-upon-wrmsr.patch > queue-3.17/kvm-x86-handle-errors-when-rip-is-set-during-far-jumps.patch > queue-3.17/kvm-x86-emulator-does-not-decode-clflush-well.patch > queue-3.17/kvm-x86-prefetch-and-hint_nop-should-have-srcmem-flag.patch > queue-3.17/kvm-x86-fix-wrong-masking-on-relative-jump-call.patch > queue-3.17/kvm-x86-decoding-guest-instructions-which-cross-page-boundary-may-fail.patch -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html