On Sat, Dec 22, 2012 at 02:26:51PM +0200, Avi Kivity wrote: > We emulate arithmetic opcodes by executing a "similar" (same operation, > different operands) on the cpu. This ensures accurate emulation, esp. wrt. > eflags. However, the prologue and epilogue around the opcode is fairly long, > consisting of a switch (for the operand size) and code to load and save the > operands. This is repeated for every opcode. > > This patch introduces an alternative way to emulate arithmetic opcodes. > Instead of the above, we have four (three on i386) functions consisting > of just the opcode and a ret; one for each operand size. For example: > > .align 8 > em_notb: > not %al > ret > > .align 8 > em_notw: > not %ax > ret > > .align 8 > em_notl: > not %eax > ret > > .align 8 > em_notq: > not %rax > ret > > The prologue and epilogue are shared across all opcodes. Note the functions > use a special calling convention; notably eflags is an input/output parameter > and is not clobbered. Rather than dispatching the four functions through a > jump table, the functions are declared as a constant size (8) so their address > can be calculated. > > Signed-off-by: Avi Kivity <avi.kivity@xxxxxxxxx> > --- > arch/x86/kvm/emulate.c | 41 +++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 41 insertions(+) > > diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c > index c7547b3..cdf7b97 100644 > --- a/arch/x86/kvm/emulate.c > +++ b/arch/x86/kvm/emulate.c > @@ -148,6 +148,7 @@ > #define Aligned ((u64)1 << 41) /* Explicitly aligned (e.g. MOVDQA) */ > #define Unaligned ((u64)1 << 42) /* Explicitly unaligned (e.g. MOVDQU) */ > #define Avx ((u64)1 << 43) /* Advanced Vector Extensions */ > +#define Fastop ((u64)1 << 44) /* Use opcode::u.fastop */ > > #define X2(x...) x, x > #define X3(x...) X2(x), x > @@ -158,6 +159,27 @@ > #define X8(x...) X4(x), X4(x) > #define X16(x...) X8(x), X8(x) > > +#define NR_FASTOP (ilog2(sizeof(ulong)) + 1) > +#define FASTOP_SIZE 8 > + > +/* > + * fastop functions have a special calling convention: > + * > + * dst: [rdx]:rax (in/out) May be I miss something obvious but I do not see why rdx is here. > + * src: rbx (in/out) > + * src2: rcx (in) > + * flags: rflags (in/out) > + * > + * Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for > + * different operand sizes can be reached by calculation, rather than a jump > + * table (which would be bigger than the code). > + * > + * fastop functions are declared as taking a never-defined fastop parameter, > + * so they can't be called from C directly. > + */ > + > +struct fastop; > + > struct opcode { > u64 flags : 56; > u64 intercept : 8; > @@ -166,6 +188,7 @@ struct opcode { > const struct opcode *group; > const struct group_dual *gdual; > const struct gprefix *gprefix; > + void (*fastop)(struct fastop *fake); > } u; > int (*check_perm)(struct x86_emulate_ctxt *ctxt); > }; > @@ -3596,6 +3619,7 @@ static int check_perm_out(struct x86_emulate_ctxt *ctxt) > #define G(_f, _g) { .flags = ((_f) | Group | ModRM), .u.group = (_g) } > #define GD(_f, _g) { .flags = ((_f) | GroupDual | ModRM), .u.gdual = (_g) } > #define I(_f, _e) { .flags = (_f), .u.execute = (_e) } > +#define F(_f, _e) { .flags = (_f) | Fastop, .u.fastop = (_e) } > #define II(_f, _e, _i) \ > { .flags = (_f), .u.execute = (_e), .intercept = x86_intercept_##_i } > #define IIP(_f, _e, _i, _p) \ > @@ -4383,6 +4407,16 @@ static void fetch_possible_mmx_operand(struct x86_emulate_ctxt *ctxt, > read_mmx_reg(ctxt, &op->mm_val, op->addr.mm); > } > > +static int fastop(struct x86_emulate_ctxt *ctxt, void (*fop)(struct fastop *)) > +{ > + ulong flags = (ctxt->eflags & EFLAGS_MASK) | X86_EFLAGS_IF; > + fop += __ffs(ctxt->dst.bytes) * FASTOP_SIZE; > + asm("push %[flags]; popf; call *%[fastop]; pushf; pop %[flags]\n" > + : "+a"(ctxt->dst.val), "+b"(ctxt->src.val), [flags]"+D"(flags) > + : "c"(ctxt->src2.val), [fastop]"S"(fop)); > + ctxt->eflags = (ctxt->eflags & ~EFLAGS_MASK) | (flags & EFLAGS_MASK); > + return X86EMUL_CONTINUE; > +} > > int x86_emulate_insn(struct x86_emulate_ctxt *ctxt) > { > @@ -4512,6 +4546,13 @@ special_insn: > } > > if (ctxt->execute) { > + if (ctxt->d & Fastop) { > + void (*fop)(struct fastop *) = (void *)ctxt->execute; > + rc = fastop(ctxt, fop); > + if (rc != X86EMUL_CONTINUE) > + goto done; > + goto writeback; > + } > rc = ctxt->execute(ctxt); > if (rc != X86EMUL_CONTINUE) > goto done; > -- > 1.7.11.7 -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html