On Thu, Jul 19, 2012 at 03:14:38PM +0300, Avi Kivity wrote: > Instead of populating the the entire register file, read in registers > as they are accessed, and write back only the modified ones. This > saves a VMREAD and VMWRITE on Intel (for rsp, since it is not usually > used during emulation), and a two 128-byte copies for the registers. > > Signed-off-by: Avi Kivity <avi@xxxxxxxxxx> > --- > arch/x86/include/asm/kvm_emulate.h | 17 ++- > arch/x86/kvm/emulate.c | 268 +++++++++++++++++++++---------------- > arch/x86/kvm/x86.c | 50 ++++--- > 3 files changed, 192 insertions(+), 143 deletions(-) > > diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h > index c764f43..2f1da16 100644 > --- a/arch/x86/include/asm/kvm_emulate.h > +++ b/arch/x86/include/asm/kvm_emulate.h > @@ -86,6 +86,19 @@ struct x86_instruction_info { > > struct x86_emulate_ops { > /* > + * read_gpr: read a general purpose register (rax - r15) > + * > + * @reg: gpr number. > + */ > + ulong (*read_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg); > + /* > + * write_gpr: write a general purpose register (rax - r15) > + * > + * @reg: gpr number. > + * @val: value to write. > + */ > + void (*write_gpr)(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val); > + /* > * read_std: Read bytes of standard (non-emulated/special) memory. > * Used for descriptor reading. > * @addr: [IN ] Linear address from which to read. > @@ -281,8 +294,10 @@ struct x86_emulate_ctxt { > bool rip_relative; > unsigned long _eip; > struct operand memop; > + u32 regs_valid; /* bitmaps of registers in _regs[] that can be read */ > + u32 regs_dirty; /* bitmaps of registers in _regs[] that have been written */ emul_regs_dirty (to avoid with the other regs_dirty). > /* Fields above regs are cleared together. */ > - unsigned long regs[NR_VCPU_REGS]; > + unsigned long _regs[NR_VCPU_REGS]; > struct operand *memopp; > struct fetch_cache fetch; > struct read_cache io_read; > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index 97d9a99..468d26e 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -202,6 +202,28 @@ struct gprefix { > #define EFLG_RESERVED_ZEROS_MASK 0xffc0802a > #define EFLG_RESERVED_ONE_MASK 2 > > +static ulong reg_read(struct x86_emulate_ctxt *ctxt, unsigned nr) > +{ > + if (!(ctxt->regs_valid & (1 << nr))) { > + ctxt->regs_valid |= 1 << nr; > + ctxt->_regs[nr] = ctxt->ops->read_gpr(ctxt, nr); > + } > + return ctxt->_regs[nr]; > +} > + > +static ulong *reg_write(struct x86_emulate_ctxt *ctxt, unsigned nr) > +{ > + ctxt->regs_valid |= 1 << nr; > + ctxt->regs_dirty |= 1 << nr; > + return &ctxt->_regs[nr]; > +} > + > +static ulong *reg_rmw(struct x86_emulate_ctxt *ctxt, unsigned nr) > +{ > + reg_read(ctxt, nr); > + return reg_write(ctxt, nr); > +} > + > /* > * Instruction emulation: > * Most instructions are emulated directly via a fragment of inline assembly > @@ -374,8 +396,8 @@ struct gprefix { > #define __emulate_1op_rax_rdx(ctxt, _op, _suffix, _ex) \ > do { \ > unsigned long _tmp; \ > - ulong *rax = &(ctxt)->regs[VCPU_REGS_RAX]; \ > - ulong *rdx = &(ctxt)->regs[VCPU_REGS_RDX]; \ > + ulong *rax = reg_rmw((ctxt), VCPU_REGS_RAX); \ > + ulong *rdx = reg_rmw((ctxt), VCPU_REGS_RDX); \ > \ > __asm__ __volatile__ ( \ > _PRE_EFLAGS("0", "5", "1") \ > @@ -773,14 +795,15 @@ static int do_insn_fetch(struct x86_emulate_ctxt *ctxt, > * pointer into the block that addresses the relevant register. > * @highbyte_regs specifies whether to decode AH,CH,DH,BH. > */ > -static void *decode_register(u8 modrm_reg, unsigned long *regs, > +static void *decode_register(struct x86_emulate_ctxt *ctxt, u8 modrm_reg, > int highbyte_regs) > { > void *p; > > - p = ®s[modrm_reg]; > if (highbyte_regs && modrm_reg >= 4 && modrm_reg < 8) > - p = (unsigned char *)®s[modrm_reg & 3] + 1; > + p = (unsigned char *)reg_rmw(ctxt, modrm_reg & 3) + 1; > + else > + p = reg_rmw(ctxt, modrm_reg); > return p; > } > > @@ -969,10 +992,10 @@ static void decode_register_operand(struct x86_emulate_ctxt *ctxt, > > op->type = OP_REG; > if (ctxt->d & ByteOp) { > - op->addr.reg = decode_register(reg, ctxt->regs, highbyte_regs); > + op->addr.reg = decode_register(ctxt, reg, highbyte_regs); > op->bytes = 1; > } else { > - op->addr.reg = decode_register(reg, ctxt->regs, 0); > + op->addr.reg = decode_register(ctxt, reg, 0); > op->bytes = ctxt->op_bytes; > } > fetch_register_operand(op); > @@ -1007,8 +1030,7 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, > if (ctxt->modrm_mod == 3) { > op->type = OP_REG; > op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; > - op->addr.reg = decode_register(ctxt->modrm_rm, > - ctxt->regs, ctxt->d & ByteOp); > + op->addr.reg = decode_register(ctxt, ctxt->modrm_rm, ctxt->d & ByteOp); > if (ctxt->d & Sse) { > op->type = OP_XMM; > op->bytes = 16; > @@ -1029,10 +1051,10 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, > op->type = OP_MEM; > > if (ctxt->ad_bytes == 2) { > - unsigned bx = ctxt->regs[VCPU_REGS_RBX]; > - unsigned bp = ctxt->regs[VCPU_REGS_RBP]; > - unsigned si = ctxt->regs[VCPU_REGS_RSI]; > - unsigned di = ctxt->regs[VCPU_REGS_RDI]; > + unsigned bx = reg_read(ctxt, VCPU_REGS_RBX); > + unsigned bp = reg_read(ctxt, VCPU_REGS_RBP); > + unsigned si = reg_read(ctxt, VCPU_REGS_RSI); > + unsigned di = reg_read(ctxt, VCPU_REGS_RDI); > > /* 16-bit ModR/M decode. */ > switch (ctxt->modrm_mod) { > @@ -1089,17 +1111,17 @@ static int decode_modrm(struct x86_emulate_ctxt *ctxt, > if ((base_reg & 7) == 5 && ctxt->modrm_mod == 0) > modrm_ea += insn_fetch(s32, ctxt); > else { > - modrm_ea += ctxt->regs[base_reg]; > + modrm_ea += reg_read(ctxt, base_reg); > adjust_modrm_seg(ctxt, base_reg); > } > if (index_reg != 4) > - modrm_ea += ctxt->regs[index_reg] << scale; > + modrm_ea += reg_read(ctxt, index_reg) << scale; > } else if ((ctxt->modrm_rm & 7) == 5 && ctxt->modrm_mod == 0) { > if (ctxt->mode == X86EMUL_MODE_PROT64) > ctxt->rip_relative = 1; > } else { > base_reg = ctxt->modrm_rm; > - modrm_ea += ctxt->regs[base_reg]; > + modrm_ea += reg_read(ctxt, base_reg); > adjust_modrm_seg(ctxt, base_reg); > } > switch (ctxt->modrm_mod) { > @@ -1240,10 +1262,10 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, > if (rc->pos == rc->end) { /* refill pio read ahead */ > unsigned int in_page, n; > unsigned int count = ctxt->rep_prefix ? > - address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) : 1; > + address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) : 1; > in_page = (ctxt->eflags & EFLG_DF) ? > - offset_in_page(ctxt->regs[VCPU_REGS_RDI]) : > - PAGE_SIZE - offset_in_page(ctxt->regs[VCPU_REGS_RDI]); > + offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : > + PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); > n = min(min(in_page, (unsigned int)sizeof(rc->data)) / size, > count); > if (n == 0) > @@ -1522,8 +1544,8 @@ static int push(struct x86_emulate_ctxt *ctxt, void *data, int bytes) > { > struct segmented_address addr; > > - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], -bytes); > - addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); > + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), -bytes); > + addr.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSP)); > addr.seg = VCPU_SREG_SS; > > return segmented_write(ctxt, addr, data, bytes); > @@ -1542,13 +1564,13 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt, > int rc; > struct segmented_address addr; > > - addr.ea = register_address(ctxt, ctxt->regs[VCPU_REGS_RSP]); > + addr.ea = register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSP)); > addr.seg = VCPU_SREG_SS; > rc = segmented_read(ctxt, addr, dest, len); > if (rc != X86EMUL_CONTINUE) > return rc; > > - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], len); > + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), len); > return rc; > } > > @@ -1610,26 +1632,28 @@ static int em_enter(struct x86_emulate_ctxt *ctxt) > int rc; > unsigned frame_size = ctxt->src.val; > unsigned nesting_level = ctxt->src2.val & 31; > + ulong rbp; > > if (nesting_level) > return X86EMUL_UNHANDLEABLE; > > - rc = push(ctxt, &ctxt->regs[VCPU_REGS_RBP], stack_size(ctxt)); > + rbp = reg_read(ctxt, VCPU_REGS_RBP); > + rc = push(ctxt, &rbp, stack_size(ctxt)); > if (rc != X86EMUL_CONTINUE) > return rc; > - assign_masked(&ctxt->regs[VCPU_REGS_RBP], ctxt->regs[VCPU_REGS_RSP], > + assign_masked(reg_rmw(ctxt, VCPU_REGS_RBP), reg_read(ctxt, VCPU_REGS_RSP), > stack_mask(ctxt)); > - assign_masked(&ctxt->regs[VCPU_REGS_RSP], > - ctxt->regs[VCPU_REGS_RSP] - frame_size, > + assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), > + reg_read(ctxt, VCPU_REGS_RSP) - frame_size, > stack_mask(ctxt)); > return X86EMUL_CONTINUE; > } > > static int em_leave(struct x86_emulate_ctxt *ctxt) > { > - assign_masked(&ctxt->regs[VCPU_REGS_RSP], ctxt->regs[VCPU_REGS_RBP], > + assign_masked(reg_rmw(ctxt, VCPU_REGS_RSP), reg_read(ctxt, VCPU_REGS_RBP), > stack_mask(ctxt)); > - return emulate_pop(ctxt, &ctxt->regs[VCPU_REGS_RBP], ctxt->op_bytes); > + return emulate_pop(ctxt, reg_rmw(ctxt, VCPU_REGS_RBP), ctxt->op_bytes); > } > > static int em_push_sreg(struct x86_emulate_ctxt *ctxt) > @@ -1657,13 +1681,13 @@ static int em_pop_sreg(struct x86_emulate_ctxt *ctxt) > > static int em_pusha(struct x86_emulate_ctxt *ctxt) > { > - unsigned long old_esp = ctxt->regs[VCPU_REGS_RSP]; > + unsigned long old_esp = reg_read(ctxt, VCPU_REGS_RSP); > int rc = X86EMUL_CONTINUE; > int reg = VCPU_REGS_RAX; > > while (reg <= VCPU_REGS_RDI) { > (reg == VCPU_REGS_RSP) ? > - (ctxt->src.val = old_esp) : (ctxt->src.val = ctxt->regs[reg]); > + (ctxt->src.val = old_esp) : (ctxt->src.val = reg_read(ctxt, reg)); > > rc = em_push(ctxt); > if (rc != X86EMUL_CONTINUE) > @@ -1688,12 +1712,12 @@ static int em_popa(struct x86_emulate_ctxt *ctxt) > > while (reg >= VCPU_REGS_RAX) { > if (reg == VCPU_REGS_RSP) { > - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], > + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), > ctxt->op_bytes); > --reg; > } > > - rc = emulate_pop(ctxt, &ctxt->regs[reg], ctxt->op_bytes); > + rc = emulate_pop(ctxt, reg_rmw(ctxt, reg), ctxt->op_bytes); > if (rc != X86EMUL_CONTINUE) > break; > --reg; > @@ -1961,14 +1985,14 @@ static int em_cmpxchg8b(struct x86_emulate_ctxt *ctxt) > { > u64 old = ctxt->dst.orig_val64; > > - if (((u32) (old >> 0) != (u32) ctxt->regs[VCPU_REGS_RAX]) || > - ((u32) (old >> 32) != (u32) ctxt->regs[VCPU_REGS_RDX])) { > - ctxt->regs[VCPU_REGS_RAX] = (u32) (old >> 0); > - ctxt->regs[VCPU_REGS_RDX] = (u32) (old >> 32); > + if (((u32) (old >> 0) != (u32) reg_read(ctxt, VCPU_REGS_RAX)) || > + ((u32) (old >> 32) != (u32) reg_read(ctxt, VCPU_REGS_RDX))) { > + *reg_write(ctxt, VCPU_REGS_RAX) = (u32) (old >> 0); > + *reg_write(ctxt, VCPU_REGS_RDX) = (u32) (old >> 32); > ctxt->eflags &= ~EFLG_ZF; > } else { > - ctxt->dst.val64 = ((u64)ctxt->regs[VCPU_REGS_RCX] << 32) | > - (u32) ctxt->regs[VCPU_REGS_RBX]; > + ctxt->dst.val64 = ((u64)reg_read(ctxt, VCPU_REGS_RCX) << 32) | > + (u32) reg_read(ctxt, VCPU_REGS_RBX); > > ctxt->eflags |= EFLG_ZF; > } > @@ -2004,7 +2028,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) > { > /* Save real source value, then compare EAX against destination. */ > ctxt->src.orig_val = ctxt->src.val; > - ctxt->src.val = ctxt->regs[VCPU_REGS_RAX]; > + ctxt->src.val = reg_read(ctxt, VCPU_REGS_RAX); > emulate_2op_SrcV(ctxt, "cmp"); > > if (ctxt->eflags & EFLG_ZF) { > @@ -2013,7 +2037,7 @@ static int em_cmpxchg(struct x86_emulate_ctxt *ctxt) > } else { > /* Failure: write the value we saw to EAX. */ > ctxt->dst.type = OP_REG; > - ctxt->dst.addr.reg = (unsigned long *)&ctxt->regs[VCPU_REGS_RAX]; > + ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); > } > return X86EMUL_CONTINUE; > } > @@ -2153,10 +2177,10 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt) > ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); > ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); > > - ctxt->regs[VCPU_REGS_RCX] = ctxt->_eip; > + *reg_write(ctxt, VCPU_REGS_RCX) = ctxt->_eip; > if (efer & EFER_LMA) { > #ifdef CONFIG_X86_64 > - ctxt->regs[VCPU_REGS_R11] = ctxt->eflags & ~EFLG_RF; > + *reg_write(ctxt, VCPU_REGS_R11) = ctxt->eflags & ~EFLG_RF; > > ops->get_msr(ctxt, > ctxt->mode == X86EMUL_MODE_PROT64 ? > @@ -2235,7 +2259,7 @@ static int em_sysenter(struct x86_emulate_ctxt *ctxt) > ctxt->_eip = msr_data; > > ops->get_msr(ctxt, MSR_IA32_SYSENTER_ESP, &msr_data); > - ctxt->regs[VCPU_REGS_RSP] = msr_data; > + *reg_write(ctxt, VCPU_REGS_RSP) = msr_data; > > return X86EMUL_CONTINUE; > } > @@ -2285,8 +2309,8 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) > ops->set_segment(ctxt, cs_sel, &cs, 0, VCPU_SREG_CS); > ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); > > - ctxt->_eip = ctxt->regs[VCPU_REGS_RDX]; > - ctxt->regs[VCPU_REGS_RSP] = ctxt->regs[VCPU_REGS_RCX]; > + ctxt->_eip = reg_read(ctxt, VCPU_REGS_RDX); > + *reg_write(ctxt, VCPU_REGS_RSP) = reg_read(ctxt, VCPU_REGS_RCX); > > return X86EMUL_CONTINUE; > } > @@ -2355,14 +2379,14 @@ static void save_state_to_tss16(struct x86_emulate_ctxt *ctxt, > { > tss->ip = ctxt->_eip; > tss->flag = ctxt->eflags; > - tss->ax = ctxt->regs[VCPU_REGS_RAX]; > - tss->cx = ctxt->regs[VCPU_REGS_RCX]; > - tss->dx = ctxt->regs[VCPU_REGS_RDX]; > - tss->bx = ctxt->regs[VCPU_REGS_RBX]; > - tss->sp = ctxt->regs[VCPU_REGS_RSP]; > - tss->bp = ctxt->regs[VCPU_REGS_RBP]; > - tss->si = ctxt->regs[VCPU_REGS_RSI]; > - tss->di = ctxt->regs[VCPU_REGS_RDI]; > + tss->ax = reg_read(ctxt, VCPU_REGS_RAX); > + tss->cx = reg_read(ctxt, VCPU_REGS_RCX); > + tss->dx = reg_read(ctxt, VCPU_REGS_RDX); > + tss->bx = reg_read(ctxt, VCPU_REGS_RBX); > + tss->sp = reg_read(ctxt, VCPU_REGS_RSP); > + tss->bp = reg_read(ctxt, VCPU_REGS_RBP); > + tss->si = reg_read(ctxt, VCPU_REGS_RSI); > + tss->di = reg_read(ctxt, VCPU_REGS_RDI); > > tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); > tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); > @@ -2378,14 +2402,14 @@ static int load_state_from_tss16(struct x86_emulate_ctxt *ctxt, > > ctxt->_eip = tss->ip; > ctxt->eflags = tss->flag | 2; > - ctxt->regs[VCPU_REGS_RAX] = tss->ax; > - ctxt->regs[VCPU_REGS_RCX] = tss->cx; > - ctxt->regs[VCPU_REGS_RDX] = tss->dx; > - ctxt->regs[VCPU_REGS_RBX] = tss->bx; > - ctxt->regs[VCPU_REGS_RSP] = tss->sp; > - ctxt->regs[VCPU_REGS_RBP] = tss->bp; > - ctxt->regs[VCPU_REGS_RSI] = tss->si; > - ctxt->regs[VCPU_REGS_RDI] = tss->di; > + *reg_write(ctxt, VCPU_REGS_RAX) = tss->ax; > + *reg_write(ctxt, VCPU_REGS_RCX) = tss->cx; > + *reg_write(ctxt, VCPU_REGS_RDX) = tss->dx; > + *reg_write(ctxt, VCPU_REGS_RBX) = tss->bx; > + *reg_write(ctxt, VCPU_REGS_RSP) = tss->sp; > + *reg_write(ctxt, VCPU_REGS_RBP) = tss->bp; > + *reg_write(ctxt, VCPU_REGS_RSI) = tss->si; > + *reg_write(ctxt, VCPU_REGS_RDI) = tss->di; > > /* > * SDM says that segment selectors are loaded before segment > @@ -2470,14 +2494,14 @@ static void save_state_to_tss32(struct x86_emulate_ctxt *ctxt, > tss->cr3 = ctxt->ops->get_cr(ctxt, 3); > tss->eip = ctxt->_eip; > tss->eflags = ctxt->eflags; > - tss->eax = ctxt->regs[VCPU_REGS_RAX]; > - tss->ecx = ctxt->regs[VCPU_REGS_RCX]; > - tss->edx = ctxt->regs[VCPU_REGS_RDX]; > - tss->ebx = ctxt->regs[VCPU_REGS_RBX]; > - tss->esp = ctxt->regs[VCPU_REGS_RSP]; > - tss->ebp = ctxt->regs[VCPU_REGS_RBP]; > - tss->esi = ctxt->regs[VCPU_REGS_RSI]; > - tss->edi = ctxt->regs[VCPU_REGS_RDI]; > + tss->eax = reg_read(ctxt, VCPU_REGS_RAX); > + tss->ecx = reg_read(ctxt, VCPU_REGS_RCX); > + tss->edx = reg_read(ctxt, VCPU_REGS_RDX); > + tss->ebx = reg_read(ctxt, VCPU_REGS_RBX); > + tss->esp = reg_read(ctxt, VCPU_REGS_RSP); > + tss->ebp = reg_read(ctxt, VCPU_REGS_RBP); > + tss->esi = reg_read(ctxt, VCPU_REGS_RSI); > + tss->edi = reg_read(ctxt, VCPU_REGS_RDI); > > tss->es = get_segment_selector(ctxt, VCPU_SREG_ES); > tss->cs = get_segment_selector(ctxt, VCPU_SREG_CS); > @@ -2499,14 +2523,14 @@ static int load_state_from_tss32(struct x86_emulate_ctxt *ctxt, > ctxt->eflags = tss->eflags | 2; > > /* General purpose registers */ > - ctxt->regs[VCPU_REGS_RAX] = tss->eax; > - ctxt->regs[VCPU_REGS_RCX] = tss->ecx; > - ctxt->regs[VCPU_REGS_RDX] = tss->edx; > - ctxt->regs[VCPU_REGS_RBX] = tss->ebx; > - ctxt->regs[VCPU_REGS_RSP] = tss->esp; > - ctxt->regs[VCPU_REGS_RBP] = tss->ebp; > - ctxt->regs[VCPU_REGS_RSI] = tss->esi; > - ctxt->regs[VCPU_REGS_RDI] = tss->edi; > + *reg_write(ctxt, VCPU_REGS_RAX) = tss->eax; > + *reg_write(ctxt, VCPU_REGS_RCX) = tss->ecx; > + *reg_write(ctxt, VCPU_REGS_RDX) = tss->edx; > + *reg_write(ctxt, VCPU_REGS_RBX) = tss->ebx; > + *reg_write(ctxt, VCPU_REGS_RSP) = tss->esp; > + *reg_write(ctxt, VCPU_REGS_RBP) = tss->ebp; > + *reg_write(ctxt, VCPU_REGS_RSI) = tss->esi; > + *reg_write(ctxt, VCPU_REGS_RDI) = tss->edi; > > /* > * SDM says that segment selectors are loaded before segment > @@ -2738,8 +2762,8 @@ static void string_addr_inc(struct x86_emulate_ctxt *ctxt, unsigned seg, > { > int df = (ctxt->eflags & EFLG_DF) ? -1 : 1; > > - register_address_increment(ctxt, &ctxt->regs[reg], df * op->bytes); > - op->addr.mem.ea = register_address(ctxt, ctxt->regs[reg]); > + register_address_increment(ctxt, reg_rmw(ctxt, reg), df * op->bytes); > + op->addr.mem.ea = register_address(ctxt, reg_read(ctxt, reg)); > op->addr.mem.seg = seg; > } > > @@ -2825,7 +2849,7 @@ static int em_ret_near_imm(struct x86_emulate_ctxt *ctxt) > rc = emulate_pop(ctxt, &ctxt->dst.val, ctxt->op_bytes); > if (rc != X86EMUL_CONTINUE) > return rc; > - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RSP], ctxt->src.val); > + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RSP), ctxt->src.val); > return X86EMUL_CONTINUE; > } > > @@ -2915,7 +2939,7 @@ static int em_cwd(struct x86_emulate_ctxt *ctxt) > { > ctxt->dst.type = OP_REG; > ctxt->dst.bytes = ctxt->src.bytes; > - ctxt->dst.addr.reg = &ctxt->regs[VCPU_REGS_RDX]; > + ctxt->dst.addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); > ctxt->dst.val = ~((ctxt->src.val >> (ctxt->src.bytes * 8 - 1)) - 1); > > return X86EMUL_CONTINUE; > @@ -2926,8 +2950,8 @@ static int em_rdtsc(struct x86_emulate_ctxt *ctxt) > u64 tsc = 0; > > ctxt->ops->get_msr(ctxt, MSR_IA32_TSC, &tsc); > - ctxt->regs[VCPU_REGS_RAX] = (u32)tsc; > - ctxt->regs[VCPU_REGS_RDX] = tsc >> 32; > + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)tsc; > + *reg_write(ctxt, VCPU_REGS_RDX) = tsc >> 32; > return X86EMUL_CONTINUE; > } > > @@ -2935,10 +2959,10 @@ static int em_rdpmc(struct x86_emulate_ctxt *ctxt) > { > u64 pmc; > > - if (ctxt->ops->read_pmc(ctxt, ctxt->regs[VCPU_REGS_RCX], &pmc)) > + if (ctxt->ops->read_pmc(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &pmc)) > return emulate_gp(ctxt, 0); > - ctxt->regs[VCPU_REGS_RAX] = (u32)pmc; > - ctxt->regs[VCPU_REGS_RDX] = pmc >> 32; > + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)pmc; > + *reg_write(ctxt, VCPU_REGS_RDX) = pmc >> 32; > return X86EMUL_CONTINUE; > } > > @@ -2980,9 +3004,9 @@ static int em_wrmsr(struct x86_emulate_ctxt *ctxt) > { > u64 msr_data; > > - msr_data = (u32)ctxt->regs[VCPU_REGS_RAX] > - | ((u64)ctxt->regs[VCPU_REGS_RDX] << 32); > - if (ctxt->ops->set_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], msr_data)) > + msr_data = (u32)reg_read(ctxt, VCPU_REGS_RAX) > + | ((u64)reg_read(ctxt, VCPU_REGS_RDX) << 32); > + if (ctxt->ops->set_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), msr_data)) > return emulate_gp(ctxt, 0); > > return X86EMUL_CONTINUE; > @@ -2992,11 +3016,11 @@ static int em_rdmsr(struct x86_emulate_ctxt *ctxt) > { > u64 msr_data; > > - if (ctxt->ops->get_msr(ctxt, ctxt->regs[VCPU_REGS_RCX], &msr_data)) > + if (ctxt->ops->get_msr(ctxt, reg_read(ctxt, VCPU_REGS_RCX), &msr_data)) > return emulate_gp(ctxt, 0); > > - ctxt->regs[VCPU_REGS_RAX] = (u32)msr_data; > - ctxt->regs[VCPU_REGS_RDX] = msr_data >> 32; > + *reg_write(ctxt, VCPU_REGS_RAX) = (u32)msr_data; > + *reg_write(ctxt, VCPU_REGS_RDX) = msr_data >> 32; > return X86EMUL_CONTINUE; > } > > @@ -3176,8 +3200,8 @@ static int em_lmsw(struct x86_emulate_ctxt *ctxt) > > static int em_loop(struct x86_emulate_ctxt *ctxt) > { > - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); > - if ((address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) != 0) && > + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); > + if ((address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX) != 0)) && > (ctxt->b == 0xe2 || test_cc(ctxt->b ^ 0x5, ctxt->eflags))) > jmp_rel(ctxt, ctxt->src.val); > > @@ -3186,7 +3210,7 @@ static int em_loop(struct x86_emulate_ctxt *ctxt) > > static int em_jcxz(struct x86_emulate_ctxt *ctxt) > { > - if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) > + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX) == 0)) > jmp_rel(ctxt, ctxt->src.val); > > return X86EMUL_CONTINUE; > @@ -3274,20 +3298,20 @@ static int em_cpuid(struct x86_emulate_ctxt *ctxt) > { > u32 eax, ebx, ecx, edx; > > - eax = ctxt->regs[VCPU_REGS_RAX]; > - ecx = ctxt->regs[VCPU_REGS_RCX]; > + eax = reg_read(ctxt, VCPU_REGS_RAX); > + ecx = reg_read(ctxt, VCPU_REGS_RCX); > ctxt->ops->get_cpuid(ctxt, &eax, &ebx, &ecx, &edx); > - ctxt->regs[VCPU_REGS_RAX] = eax; > - ctxt->regs[VCPU_REGS_RBX] = ebx; > - ctxt->regs[VCPU_REGS_RCX] = ecx; > - ctxt->regs[VCPU_REGS_RDX] = edx; > + *reg_write(ctxt, VCPU_REGS_RAX) = eax; > + *reg_write(ctxt, VCPU_REGS_RBX) = ebx; > + *reg_write(ctxt, VCPU_REGS_RCX) = ecx; > + *reg_write(ctxt, VCPU_REGS_RDX) = edx; > return X86EMUL_CONTINUE; > } > > static int em_lahf(struct x86_emulate_ctxt *ctxt) > { > - ctxt->regs[VCPU_REGS_RAX] &= ~0xff00UL; > - ctxt->regs[VCPU_REGS_RAX] |= (ctxt->eflags & 0xff) << 8; > + *reg_rmw(ctxt, VCPU_REGS_RAX) &= ~0xff00UL; > + *reg_rmw(ctxt, VCPU_REGS_RAX) |= (ctxt->eflags & 0xff) << 8; > return X86EMUL_CONTINUE; > } > > @@ -3444,7 +3468,7 @@ static int check_svme(struct x86_emulate_ctxt *ctxt) > > static int check_svme_pa(struct x86_emulate_ctxt *ctxt) > { > - u64 rax = ctxt->regs[VCPU_REGS_RAX]; > + u64 rax = reg_read(ctxt, VCPU_REGS_RAX); > > /* Valid physical address? */ > if (rax & 0xffff000000000000ULL) > @@ -3466,7 +3490,7 @@ static int check_rdtsc(struct x86_emulate_ctxt *ctxt) > static int check_rdpmc(struct x86_emulate_ctxt *ctxt) > { > u64 cr4 = ctxt->ops->get_cr(ctxt, 4); > - u64 rcx = ctxt->regs[VCPU_REGS_RCX]; > + u64 rcx = reg_read(ctxt, VCPU_REGS_RCX); > > if ((!(cr4 & X86_CR4_PCE) && ctxt->ops->cpl(ctxt)) || > (rcx > 3)) > @@ -3924,7 +3948,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, > case OpAcc: > op->type = OP_REG; > op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; > - op->addr.reg = &ctxt->regs[VCPU_REGS_RAX]; > + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RAX); > fetch_register_operand(op); > op->orig_val = op->val; > break; > @@ -3932,19 +3956,19 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, > op->type = OP_MEM; > op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; > op->addr.mem.ea = > - register_address(ctxt, ctxt->regs[VCPU_REGS_RDI]); > + register_address(ctxt, reg_read(ctxt, VCPU_REGS_RDI)); > op->addr.mem.seg = VCPU_SREG_ES; > op->val = 0; > break; > case OpDX: > op->type = OP_REG; > op->bytes = 2; > - op->addr.reg = &ctxt->regs[VCPU_REGS_RDX]; > + op->addr.reg = reg_rmw(ctxt, VCPU_REGS_RDX); > fetch_register_operand(op); > break; > case OpCL: > op->bytes = 1; > - op->val = ctxt->regs[VCPU_REGS_RCX] & 0xff; > + op->val = reg_read(ctxt, VCPU_REGS_RCX) & 0xff; > break; > case OpImmByte: > rc = decode_imm(ctxt, op, 1, true); > @@ -3975,7 +3999,7 @@ static int decode_operand(struct x86_emulate_ctxt *ctxt, struct operand *op, > op->type = OP_MEM; > op->bytes = (ctxt->d & ByteOp) ? 1 : ctxt->op_bytes; > op->addr.mem.ea = > - register_address(ctxt, ctxt->regs[VCPU_REGS_RSI]); > + register_address(ctxt, reg_read(ctxt, VCPU_REGS_RSI)); > op->addr.mem.seg = seg_override(ctxt); > op->val = 0; > break; > @@ -4281,6 +4305,14 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, > read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); > } > > +static void writeback_registers(struct x86_emulate_ctxt *ctxt) > +{ > + unsigned reg; > + > + for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) > + ctxt->ops->write_gpr(ctxt, reg, ctxt->_regs[reg]); > +} > + > int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > { > struct x86_emulate_ops *ops = ctxt->ops; > @@ -4365,7 +4397,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > > if (ctxt->rep_prefix && (ctxt->d & String)) { > /* All REP prefixes have the same first termination condition */ > - if (address_mask(ctxt, ctxt->regs[VCPU_REGS_RCX]) == 0) { > + if (address_mask(ctxt, reg_read(ctxt, VCPU_REGS_RCX)) == 0) { > ctxt->eip = ctxt->_eip; > goto done; > } > @@ -4438,7 +4470,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > ctxt->dst.val = ctxt->src.addr.mem.ea; > break; > case 0x90 ... 0x97: /* nop / xchg reg, rax */ > - if (ctxt->dst.addr.reg == &ctxt->regs[VCPU_REGS_RAX]) > + if (ctxt->dst.addr.reg == reg_rmw(ctxt, VCPU_REGS_RAX)) > break; > rc = em_xchg(ctxt); > break; > @@ -4466,7 +4498,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > rc = em_grp2(ctxt); > break; > case 0xd2 ... 0xd3: /* Grp2 */ > - ctxt->src.val = ctxt->regs[VCPU_REGS_RCX]; > + ctxt->src.val = reg_read(ctxt, VCPU_REGS_RCX); > rc = em_grp2(ctxt); > break; > case 0xe9: /* jmp rel */ > @@ -4521,14 +4553,14 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > > if (ctxt->rep_prefix && (ctxt->d & String)) { > struct read_cache *r = &ctxt->io_read; > - register_address_increment(ctxt, &ctxt->regs[VCPU_REGS_RCX], -1); > + register_address_increment(ctxt, reg_rmw(ctxt, VCPU_REGS_RCX), -1); > > if (!string_insn_completed(ctxt)) { > /* > * Re-enter guest when pio read ahead buffer is empty > * or, if it is not used, after each 1024 iteration. > */ > - if ((r->end != 0 || ctxt->regs[VCPU_REGS_RCX] & 0x3ff) && > + if ((r->end != 0 || reg_read(ctxt, VCPU_REGS_RCX) & 0x3ff) && > (r->end == 0 || r->end != r->pos)) { > /* > * Reset read cache. Usually happens before > @@ -4536,6 +4568,7 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > * we have to do it here. > */ > ctxt->mem_read.end = 0; > + writeback_registers(ctxt); > return EMULATION_RESTART; > } > goto done; /* skip rip writeback */ > @@ -4550,6 +4583,9 @@ int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > if (rc == X86EMUL_INTERCEPTED) > return EMULATION_INTERCEPTED; > > + if (rc == X86EMUL_CONTINUE) > + writeback_registers(ctxt); > + Why can't this be in the writeback function in this file (named internal_writeback_registers or something)? Less cases to consider (eg why its safe to skip writeback on X86EMUL_INTERCEPTED again). > return (rc == X86EMUL_UNHANDLEABLE) ? EMULATION_FAILED : EMULATION_OK; > > twobyte_insn: > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 59b5950..b8a8982 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4325,7 +4325,19 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, > kvm_cpuid(emul_to_vcpu(ctxt), eax, ebx, ecx, edx); > } > > +static ulong emulator_read_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg) > +{ > + return kvm_register_read(emul_to_vcpu(ctxt), reg); > +} > + > +static void emulator_write_gpr(struct x86_emulate_ctxt *ctxt, unsigned reg, ulong val) > +{ > + kvm_register_write(emul_to_vcpu(ctxt), reg, val); > +} > + > static struct x86_emulate_ops emulate_ops = { > + .read_gpr = emulator_read_gpr, > + .write_gpr = emulator_write_gpr, > .read_std = kvm_read_guest_virt_system, > .write_std = kvm_write_guest_virt_system, > .fetch = kvm_fetch_guest_virt, > @@ -4360,14 +4372,6 @@ static void emulator_get_cpuid(struct x86_emulate_ctxt *ctxt, > .get_cpuid = emulator_get_cpuid, > }; > > -static void cache_all_regs(struct kvm_vcpu *vcpu) > -{ > - kvm_register_read(vcpu, VCPU_REGS_RAX); > - kvm_register_read(vcpu, VCPU_REGS_RSP); > - kvm_register_read(vcpu, VCPU_REGS_RIP); > - vcpu->arch.regs_dirty = ~0; > -} > - > static void toggle_interruptibility(struct kvm_vcpu *vcpu, u32 mask) > { > u32 int_shadow = kvm_x86_ops->get_interrupt_shadow(vcpu, mask); > @@ -4394,12 +4398,10 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) > kvm_queue_exception(vcpu, ctxt->exception.vector); > } > > -static void init_decode_cache(struct x86_emulate_ctxt *ctxt, > - const unsigned long *regs) > +static void init_decode_cache(struct x86_emulate_ctxt *ctxt) > { > memset(&ctxt->twobyte, 0, > - (void *)&ctxt->regs - (void *)&ctxt->twobyte); > - memcpy(ctxt->regs, regs, sizeof(ctxt->regs)); > + (void *)&ctxt->_regs - (void *)&ctxt->twobyte); > > ctxt->fetch.start = 0; > ctxt->fetch.end = 0; > @@ -4414,14 +4416,6 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) > struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; > int cs_db, cs_l; > > - /* > - * TODO: fix emulate.c to use guest_read/write_register > - * instead of direct ->regs accesses, can save hundred cycles > - * on Intel for instructions that don't read/change RSP, for > - * for example. > - */ > - cache_all_regs(vcpu); > - > kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l); > > ctxt->eflags = kvm_get_rflags(vcpu); > @@ -4433,7 +4427,7 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu) > X86EMUL_MODE_PROT16; > ctxt->guest_mode = is_guest_mode(vcpu); > > - init_decode_cache(ctxt, vcpu->arch.regs); > + init_decode_cache(ctxt); > vcpu->arch.emulate_regs_need_sync_from_vcpu = false; > } > > @@ -4453,7 +4447,6 @@ int kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip) > return EMULATE_FAIL; > > ctxt->eip = ctxt->_eip; > - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); > kvm_rip_write(vcpu, ctxt->eip); > kvm_set_rflags(vcpu, ctxt->eflags); Need to writeback here? > @@ -4601,7 +4594,7 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, > changes registers values during IO operation */ > if (vcpu->arch.emulate_regs_need_sync_from_vcpu) { > vcpu->arch.emulate_regs_need_sync_from_vcpu = false; > - memcpy(ctxt->regs, vcpu->arch.regs, sizeof ctxt->regs); > + ctxt->regs_valid = 0; > } I think you can improve this hack now (perhaps invalidate the emulator cache on SET_REGS?). > restart: > @@ -4639,7 +4632,6 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, > toggle_interruptibility(vcpu, ctxt->interruptibility); > kvm_set_rflags(vcpu, ctxt->eflags); > kvm_make_request(KVM_REQ_EVENT, vcpu); > - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); > vcpu->arch.emulate_regs_need_sync_to_vcpu = false; > kvm_rip_write(vcpu, ctxt->eip); > } else > @@ -5593,7 +5585,11 @@ int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) > * backdoor interface) need this to work > */ > struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; > - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); > + unsigned reg; > + > + for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) > + kvm_register_write(vcpu, reg, ctxt->_regs[reg]); > + Same as comment below. > vcpu->arch.emulate_regs_need_sync_to_vcpu = false; > } > regs->rax = kvm_register_read(vcpu, VCPU_REGS_RAX); > @@ -5724,6 +5720,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, > { > struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; > int ret; > + unsigned reg; > > init_emulate_ctxt(vcpu); > > @@ -5733,7 +5730,8 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int idt_index, > if (ret) > return EMULATE_FAIL; > > - memcpy(vcpu->arch.regs, ctxt->regs, sizeof ctxt->regs); > + for_each_set_bit(reg, (ulong *)&ctxt->regs_dirty, 16) > + kvm_register_write(vcpu, reg, ctxt->_regs[reg]); Should update regs_avail/regs_dirty? Better to do any of that in emulator.c via interfaces. > kvm_rip_write(vcpu, ctxt->eip); > kvm_set_rflags(vcpu, ctxt->eflags); > kvm_make_request(KVM_REQ_EVENT, vcpu); > -- > 1.7.11.2 Did not double check that emulator.c convertion is complete with your patch (which should be done). -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html