On 05/23/2012 05:08 PM, Gleb Natapov wrote: > "rep ins" emulation is going through emulator now. This is slow because > emulator knows how to write back only one datum at a time. This patch > provides fast path for the instruction in certain conditions. The > conditions are: DF flag is not set, destination memory is RAM and single > datum does not cross page boundary. If fast path code fails it falls > back to emulation. > > > +static int __kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, > + unsigned short port, unsigned long addr, > + int count) > +{ > + struct page *page; > + gpa_t gpa; > + char *kaddr; > + int ret; > + > + gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL); > + > + if (gpa == UNMAPPED_GVA || > + (gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) > + return EMULATE_FAIL; > + > + page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT); > + if (is_error_page(page)) { > + kvm_release_page_clean(page); > + return EMULATE_FAIL; > + } > + > + kaddr = kmap_atomic(page); > + kaddr += offset_in_page(gpa); > + > + ret = emulator_pio_in_emulated(&vcpu->arch.emulate_ctxt, size, port, > + kaddr, count); > + > + kunmap_atomic(kaddr); > + if (ret) { > + kvm_register_write(vcpu, VCPU_REGS_RCX, > + kvm_register_read(vcpu, VCPU_REGS_RCX) - count); Sometimes we only modify the low 16 bits of %rcx. > + kvm_register_write(vcpu, VCPU_REGS_RDI, > + kvm_register_read(vcpu, VCPU_REGS_RDI) + count*size); Possibly, ditto. > + kvm_release_page_dirty(page); > + return EMULATE_DONE; > + } > + kvm_release_page_clean(page); > + return EMULATE_DO_MMIO; > +} > + > + > +int kvm_fast_string_pio_in(struct kvm_vcpu *vcpu, int size, > + unsigned short port, u8 addr_size) This is actually the logarithm of addr_size, the variable name should reflect it. > +{ > + unsigned long masks[] = {0xffff, 0xffffffff, ~0}; > + unsigned long rdi = kvm_register_read(vcpu, VCPU_REGS_RDI); > + unsigned long linear_addr = rdi + get_segment_base(vcpu, VCPU_SREG_ES); > + unsigned long rcx = kvm_register_read(vcpu, VCPU_REGS_RCX), count; > + int r; > + Missing es limit check - rep in can succeed for part of the page, fail when the %rdi hits the limit. > + if (rcx == 0) { > + kvm_x86_ops->skip_emulated_instruction(vcpu); > + return EMULATE_DONE; > + } > + if (kvm_get_rflags(vcpu) & X86_EFLAGS_DF) > + return EMULATE_FAIL; > + if (addr_size > 2) > + return EMULATE_FAIL; > + > + linear_addr &= masks[addr_size]; Also need to mask %rcx. > + > + count = (PAGE_SIZE - offset_in_page(linear_addr))/size; > + > + if (count == 0) /* 'in' crosses page boundry */ > + return EMULATE_FAIL; > + > + count = min(count, rcx); > + > + r = __kvm_fast_string_pio_in(vcpu, size, port, linear_addr, count); > + > + if (r != EMULATE_DO_MMIO) > + return r; > + > + vcpu->arch.fast_string_pio_ctxt.linear_addr = linear_addr; > + vcpu->arch.complete_userspace_io = complete_fast_string_pio; > + return EMULATE_DO_MMIO; > +} > +EXPORT_SYMBOL_GPL(kvm_fast_string_pio_in); Perhaps a better way to do it is to move the code into the emulator, which already handles all the checks and masks, and just avoid x86_decode_insn()/x86_emulate_insn(). > + > static void tsc_bad(void *info) > { > __this_cpu_write(cpu_tsc_khz, 0); -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html