Re: [PATCH 05/14] KVM: x86: Emulator fixes for eip canonical checks on near branches

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 10/24/2014 08:07 AM, Paolo Bonzini wrote:
> From: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>
> 
> Before changing rip (during jmp, call, ret, etc.) the target should be asserted
> to be canonical one, as real CPUs do.  During sysret, both target rsp and rip
> should be canonical. If any of these values is noncanonical, a #GP exception
> should occur.  The exception to this rule are syscall and sysenter instructions
> in which the assigned rip is checked during the assignment to the relevant
> MSRs.

Careful here.  AMD CPUs (IIUC) send #PF (or maybe #GP) from CPL3 instead
of #GP from CPL0 on sysret to a non-canonical address.  That behavior is
*far* better than the Intel behavior, and it may be important.

If an OS relies on that behavior on AMD CPUs, and guest ring 3 can force
guest ring 0 to do an emulated sysret to a non-canonical address, than
the guest ring3 code can own the guest ring0 code.

--Andy

> 
> This patch fixes the emulator to behave as real CPUs do for near branches.
> Far branches are handled by the next patch.
> 
> This fixes CVE-2014-3647.
> 
> Cc: stable@xxxxxxxxxxxxxxx
> Signed-off-by: Nadav Amit <namit@xxxxxxxxxxxxxxxxx>
> Signed-off-by: Paolo Bonzini <pbonzini@xxxxxxxxxx>
> ---
>  arch/x86/kvm/emulate.c | 78 ++++++++++++++++++++++++++++++++++----------------
>  1 file changed, 54 insertions(+), 24 deletions(-)
> 
> diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
> index 047698974799..a1b9139169f6 100644
> --- a/arch/x86/kvm/emulate.c
> +++ b/arch/x86/kvm/emulate.c
> @@ -564,7 +564,8 @@ static int emulate_nm(struct x86_emulate_ctxt *ctxt)
>  	return emulate_exception(ctxt, NM_VECTOR, 0, false);
>  }
>  
> -static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
> +static inline int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst,
> +			       int cs_l)
>  {
>  	switch (ctxt->op_bytes) {
>  	case 2:
> @@ -574,16 +575,25 @@ static inline void assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
>  		ctxt->_eip = (u32)dst;
>  		break;
>  	case 8:
> +		if ((cs_l && is_noncanonical_address(dst)) ||
> +		    (!cs_l && (dst & ~(u32)-1)))
> +			return emulate_gp(ctxt, 0);
>  		ctxt->_eip = dst;
>  		break;
>  	default:
>  		WARN(1, "unsupported eip assignment size\n");
>  	}
> +	return X86EMUL_CONTINUE;
> +}
> +
> +static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst)
> +{
> +	return assign_eip_far(ctxt, dst, ctxt->mode == X86EMUL_MODE_PROT64);
>  }
>  
> -static inline void jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
> +static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel)
>  {
> -	assign_eip_near(ctxt, ctxt->_eip + rel);
> +	return assign_eip_near(ctxt, ctxt->_eip + rel);
>  }
>  
>  static u16 get_segment_selector(struct x86_emulate_ctxt *ctxt, unsigned seg)
> @@ -1998,13 +2008,15 @@ static int em_grp45(struct x86_emulate_ctxt *ctxt)
>  	case 2: /* call near abs */ {
>  		long int old_eip;
>  		old_eip = ctxt->_eip;
> -		ctxt->_eip = ctxt->src.val;
> +		rc = assign_eip_near(ctxt, ctxt->src.val);
> +		if (rc != X86EMUL_CONTINUE)
> +			break;
>  		ctxt->src.val = old_eip;
>  		rc = em_push(ctxt);
>  		break;
>  	}
>  	case 4: /* jmp abs */
> -		ctxt->_eip = ctxt->src.val;
> +		rc = assign_eip_near(ctxt, ctxt->src.val);
>  		break;
>  	case 5: /* jmp far */
>  		rc = em_jmp_far(ctxt);
> @@ -2039,10 +2051,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt)
>  
>  static int em_ret(struct x86_emulate_ctxt *ctxt)
>  {
> -	ctxt->dst.type = OP_REG;
> -	ctxt->dst.addr.reg = &ctxt->_eip;
> -	ctxt->dst.bytes = ctxt->op_bytes;
> -	return em_pop(ctxt);
> +	int rc;
> +	unsigned long eip;
> +
> +	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
> +	if (rc != X86EMUL_CONTINUE)
> +		return rc;
> +
> +	return assign_eip_near(ctxt, eip);
>  }
>  
>  static int em_ret_far(struct x86_emulate_ctxt *ctxt)
> @@ -2323,7 +2339,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>  {
>  	const struct x86_emulate_ops *ops = ctxt->ops;
>  	struct desc_struct cs, ss;
> -	u64 msr_data;
> +	u64 msr_data, rcx, rdx;
>  	int usermode;
>  	u16 cs_sel = 0, ss_sel = 0;
>  
> @@ -2339,6 +2355,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>  	else
>  		usermode = X86EMUL_MODE_PROT32;
>  
> +	rcx = reg_read(ctxt, VCPU_REGS_RCX);
> +	rdx = reg_read(ctxt, VCPU_REGS_RDX);
> +
>  	cs.dpl = 3;
>  	ss.dpl = 3;
>  	ops->get_msr(ctxt, MSR_IA32_SYSENTER_CS, &msr_data);
> @@ -2356,6 +2375,9 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>  		ss_sel = cs_sel + 8;
>  		cs.d = 0;
>  		cs.l = 1;
> +		if (is_noncanonical_address(rcx) ||
> +		    is_noncanonical_address(rdx))
> +			return emulate_gp(ctxt, 0);
>  		break;
>  	}
>  	cs_sel |= SELECTOR_RPL_MASK;
> @@ -2364,8 +2386,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt)
>  	ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS);
>  	ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS);
>  
> -	ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX);
> -	*reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX);
> +	ctxt->_eip = rdx;
> +	*reg_write(ctxt, VCPU_REGS_RSP) = rcx;
>  
>  	return X86EMUL_CONTINUE;
>  }
> @@ -2905,10 +2927,13 @@ static int em_aad(struct x86_emulate_ctxt *ctxt)
>  
>  static int em_call(struct x86_emulate_ctxt *ctxt)
>  {
> +	int rc;
>  	long rel = ctxt->src.val;
>  
>  	ctxt->src.val = (unsigned long)ctxt->_eip;
> -	jmp_rel(ctxt, rel);
> +	rc = jmp_rel(ctxt, rel);
> +	if (rc != X86EMUL_CONTINUE)
> +		return rc;
>  	return em_push(ctxt);
>  }
>  
> @@ -2940,11 +2965,12 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt)
>  static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt)
>  {
>  	int rc;
> +	unsigned long eip;
>  
> -	ctxt->dst.type = OP_REG;
> -	ctxt->dst.addr.reg = &ctxt->_eip;
> -	ctxt->dst.bytes = ctxt->op_bytes;
> -	rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes);
> +	rc = emulate_pop(ctxt, &eip, ctxt->op_bytes);
> +	if (rc != X86EMUL_CONTINUE)
> +		return rc;
> +	rc = assign_eip_near(ctxt, eip);
>  	if (rc != X86EMUL_CONTINUE)
>  		return rc;
>  	rsp_increment(ctxt, ctxt->src.val);
> @@ -3271,20 +3297,24 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt)
>  
>  static int em_loop(struct x86_emulate_ctxt *ctxt)
>  {
> +	int rc = X86EMUL_CONTINUE;
> +
>  	register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1);
>  	if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) != 0) &&
>  	    (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags)))
> -		jmp_rel(ctxt, ctxt->src.val);
> +		rc = jmp_rel(ctxt, ctxt->src.val);
>  
> -	return X86EMUL_CONTINUE;
> +	return rc;
>  }
>  
>  static int em_jcxz(struct x86_emulate_ctxt *ctxt)
>  {
> +	int rc = X86EMUL_CONTINUE;
> +
>  	if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0)
> -		jmp_rel(ctxt, ctxt->src.val);
> +		rc = jmp_rel(ctxt, ctxt->src.val);
>  
> -	return X86EMUL_CONTINUE;
> +	return rc;
>  }
>  
>  static int em_in(struct x86_emulate_ctxt *ctxt)
> @@ -4743,7 +4773,7 @@ special_insn:
>  		break;
>  	case 0x70 ... 0x7f: /* jcc (short) */
>  		if (test_cc(ctxt->b, ctxt->eflags))
> -			jmp_rel(ctxt, ctxt->src.val);
> +			rc = jmp_rel(ctxt, ctxt->src.val);
>  		break;
>  	case 0x8d: /* lea r16/r32, m */
>  		ctxt->dst.val = ctxt->src.addr.mem.ea;
> @@ -4773,7 +4803,7 @@ special_insn:
>  		break;
>  	case 0xe9: /* jmp rel */
>  	case 0xeb: /* jmp rel short */
> -		jmp_rel(ctxt, ctxt->src.val);
> +		rc = jmp_rel(ctxt, ctxt->src.val);
>  		ctxt->dst.type = OP_NONE; /* Disable writeback. */
>  		break;
>  	case 0xf4:              /* hlt */
> @@ -4898,7 +4928,7 @@ twobyte_insn:
>  		break;
>  	case 0x80 ... 0x8f: /* jnz rel, etc*/
>  		if (test_cc(ctxt->b, ctxt->eflags))
> -			jmp_rel(ctxt, ctxt->src.val);
> +			rc = jmp_rel(ctxt, ctxt->src.val);
>  		break;
>  	case 0x90 ... 0x9f:     /* setcc r/m8 */
>  		ctxt->dst.val = test_cc(ctxt->b, ctxt->eflags);
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux