On 10/24/2014 08:07 AM, Paolo Bonzini wrote: > From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> > > Before changing rip (during jmp, call, ret, etc.) the target should be asserted > to be canonical one, as real CPUs do. During sysret, both target rsp and rip > should be canonical. If any of these values is noncanonical, a #GP exception > should occur. The exception to this rule are syscall and sysenter instructions > in which the assigned rip is checked during the assignment to the relevant > MSRs. Careful here. AMD CPUs (IIUC) send #PF (or maybe #GP) from CPL3 instead of #GP from CPL0 on sysret to a non-canonical address. That behavior is *far* better than the Intel behavior, and it may be important. If an OS relies on that behavior on AMD CPUs, and guest ring 3 can force guest ring 0 to do an emulated sysret to a non-canonical address, than the guest ring3 code can own the guest ring0 code. --Andy > > This patch fixes the emulator to behave as real CPUs do for near branches. > Far branches are handled by the next patch. > > This fixes CVE-2014-3647. > > Cc: stable@xxxxxxxxxxxxxxx > Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx> > Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx> > --- > arch/x86/kvm/emulate.c | 78 ++++++++++++++++++++++++++++++++++---------------- > 1 file changed, 54 insertions(+), 24 deletions(-) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 047698974799..a1b9139169f6 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -564,7 +564,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt) > return emulate_exception(ctxt, NM_VECTOR, 0, false); > } > > -static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) > +static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, > + int cs_l) > { > switch (ctxt->op_bytes) { > case 2: > @@ -574,16 +575,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) > ctxt->_eip = (u32)dst; > break; > case 8: > + if ((cs_l && is_noncanonical_address(dst)) || > + (!cs_l && (dst & ~(u32)-1))) > + return emulate_gp(ctxt, 0); > ctxt->_eip = dst; > break; > default: > WARN(1, "unsupported eip assignment size\n"); > } > + return X86EMUL_CONTINUE; > +} > + > +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) > +{ > + return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64); > } > > -static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) > +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) > { > - assign_eip_near(ctxt, ctxt->_eip + rel); > + return assign_eip_near(ctxt, ctxt->_eip + rel); > } > > static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg) > @@ -1998,13 +2008,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt) > case 2: /* call near abs */ { > long int old_eip; > old_eip = ctxt->_eip; > - ctxt->_eip = ctxt->src.val; > + rc = assign_eip_near(ctxt, ctxt->src.val); > + if (rc != X86EMUL_CONTINUE) > + break; > ctxt->src.val = old_eip; > rc = em_push(ctxt); > break; > } > case 4: /* jmp abs */ > - ctxt->_eip = ctxt->src.val; > + rc = assign_eip_near(ctxt, ctxt->src.val); > break; > case 5: /* jmp far */ > rc = em_jmp_far(ctxt); > @@ -2039,10 +2051,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) > > static int em_ret(struct x86_emulate_ctxt *ctxt) > { > - ctxt->dst.type = OP_REG; > - ctxt->dst.addr.reg = &ctxt->_eip; > - ctxt->dst.bytes = ctxt->op_bytes; > - return em_pop(ctxt); > + int rc; > + unsigned long eip; > + > + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); > + if (rc != X86EMUL_CONTINUE) > + return rc; > + > + return assign_eip_near(ctxt, eip); > } > > static int em_ret_far(struct x86_emulate_ctxt *ctxt) > @@ -2323,7 +2339,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) > { > const struct x86_emulate_ops *ops = ctxt->ops; > struct desc_struct cs, ss; > - u64 msr_data; > + u64 msr_data, rcx, rdx; > int usermode; > u16 cs_sel = 0, ss_sel = 0; > > @@ -2339,6 +2355,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) > else > usermode = X86EMUL_MODE_PROT32; > > + rcx = reg_read(ctxt, VCPU_REGS_RCX); > + rdx = reg_read(ctxt, VCPU_REGS_RDX); > + > cs.dpl = 3; > ss.dpl = 3; > ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data); > @@ -2356,6 +2375,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) > ss_sel = cs_sel + 8; > cs.d = 0; > cs.l = 1; > + if (is_noncanonical_address(rcx) || > + is_noncanonical_address(rdx)) > + return emulate_gp(ctxt, 0); > break; > } > cs_sel |= SELECTOR_RPL_MASK; > @@ -2364,8 +2386,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) > ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); > ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); > > - ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); > - *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); > + ctxt->_eip = rdx; > + *reg_write(ctxt, VCPU_REGS_RSP) = rcx; > > return X86EMUL_CONTINUE; > } > @@ -2905,10 +2927,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt) > > static int em_call(struct x86_emulate_ctxt *ctxt) > { > + int rc; > long rel = ctxt->src.val; > > ctxt->src.val = (unsigned long)ctxt->_eip; > - jmp_rel(ctxt, rel); > + rc = jmp_rel(ctxt, rel); > + if (rc != X86EMUL_CONTINUE) > + return rc; > return em_push(ctxt); > } > > @@ -2940,11 +2965,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) > static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) > { > int rc; > + unsigned long eip; > > - ctxt->dst.type = OP_REG; > - ctxt->dst.addr.reg = &ctxt->_eip; > - ctxt->dst.bytes = ctxt->op_bytes; > - rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); > + rc = emulate_pop(ctxt, &eip, ctxt->op_bytes); > + if (rc != X86EMUL_CONTINUE) > + return rc; > + rc = assign_eip_near(ctxt, eip); > if (rc != X86EMUL_CONTINUE) > return rc; > rsp_increment(ctxt, ctxt->src.val); > @@ -3271,20 +3297,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) > > static int em_loop(struct x86_emulate_ctxt *ctxt) > { > + int rc = X86EMUL_CONTINUE; > + > register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); > if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) && > (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) > - jmp_rel(ctxt, ctxt->src.val); > + rc = jmp_rel(ctxt, ctxt->src.val); > > - return X86EMUL_CONTINUE; > + return rc; > } > > static int em_jcxz(struct x86_emulate_ctxt *ctxt) > { > + int rc = X86EMUL_CONTINUE; > + > if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) > - jmp_rel(ctxt, ctxt->src.val); > + rc = jmp_rel(ctxt, ctxt->src.val); > > - return X86EMUL_CONTINUE; > + return rc; > } > > static int em_in(struct x86_emulate_ctxt *ctxt) > @@ -4743,7 +4773,7 @@ special_insn: > break; > case 0x70 ... 0x7f: /* jcc (short) */ > if (test_cc(ctxt->b, ctxt->eflags)) > - jmp_rel(ctxt, ctxt->src.val); > + rc = jmp_rel(ctxt, ctxt->src.val); > break; > case 0x8d: /* lea r16/r32, m */ > ctxt->dst.val = ctxt->src.addr.mem.ea; > @@ -4773,7 +4803,7 @@ special_insn: > break; > case 0xe9: /* jmp rel */ > case 0xeb: /* jmp rel short */ > - jmp_rel(ctxt, ctxt->src.val); > + rc = jmp_rel(ctxt, ctxt->src.val); > ctxt->dst.type = OP_NONE; /* Disable writeback. */ > break; > case 0xf4: /* hlt */ > @@ -4898,7 +4928,7 @@ twobyte_insn: > break; > case 0x80 ... 0x8f: /* jnz rel, etc*/ > if (test_cc(ctxt->b, ctxt->eflags)) > - jmp_rel(ctxt, ctxt->src.val); > + rc = jmp_rel(ctxt, ctxt->src.val); > break; > case 0x90 ... 0x9f: /* setcc r/m8 */ > ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags); > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html