On Wed, Apr 18, 2012 at 07:22:47PM +0300, Avi Kivity wrote: > MMIO that are split across a page boundary are currently broken - the > code does not expect to be aborted by the exit to userspace for the > first MMIO fragment. > > This patch fixes the problem by generalizing the current code for handling > 16-byte MMIOs to handle a number of "fragments", and changes the MMIO > code to create those fragments. > For 16 bit IO userspace will see two 8bit writes. Is this OK? Is there real code that does this kind of IO, or the patch is for correctness only? > Signed-off-by: Avi Kivity <avi@xxxxxxxxxx> > --- > arch/ia64/include/asm/kvm_host.h | 2 + > arch/ia64/kvm/kvm-ia64.c | 10 ++-- > arch/x86/kvm/x86.c | 114 +++++++++++++++++++++++++++----------- > include/linux/kvm_host.h | 31 +++++++++-- > 4 files changed, 115 insertions(+), 42 deletions(-) > > diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h > index c4b4bac..6d6a5ac 100644 > --- a/arch/ia64/include/asm/kvm_host.h > +++ b/arch/ia64/include/asm/kvm_host.h > @@ -449,6 +449,8 @@ struct kvm_vcpu_arch { > char log_buf[VMM_LOG_LEN]; > union context host; > union context guest; > + > + char mmio_data[8]; > }; > > struct kvm_vm_stat { > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > index 9d80ff8..882ab21 100644 > --- a/arch/ia64/kvm/kvm-ia64.c > +++ b/arch/ia64/kvm/kvm-ia64.c > @@ -232,12 +232,12 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > if ((p->addr & PAGE_MASK) == IOAPIC_DEFAULT_BASE_ADDRESS) > goto mmio; > vcpu->mmio_needed = 1; > - vcpu->mmio_phys_addr = kvm_run->mmio.phys_addr = p->addr; > - vcpu->mmio_size = kvm_run->mmio.len = p->size; > + vcpu->mmio_fragments[0].gpa = kvm_run->mmio.phys_addr = p->addr; > + vcpu->mmio_fragments[0].len = kvm_run->mmio.len = p->size; > vcpu->mmio_is_write = kvm_run->mmio.is_write = !p->dir; > > if (vcpu->mmio_is_write) > - memcpy(vcpu->mmio_data, &p->data, p->size); > + memcpy(vcpu->arch.mmio_data, &p->data, p->size); > memcpy(kvm_run->mmio.data, &p->data, p->size); > kvm_run->exit_reason = KVM_EXIT_MMIO; > return 0; > @@ -719,7 +719,7 @@ static void kvm_set_mmio_data(struct kvm_vcpu *vcpu) > struct kvm_mmio_req *p = kvm_get_vcpu_ioreq(vcpu); > > if (!vcpu->mmio_is_write) > - memcpy(&p->data, vcpu->mmio_data, 8); > + memcpy(&p->data, vcpu->arch.mmio_data, 8); > p->state = STATE_IORESP_READY; > } > > @@ -739,7 +739,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > } > > if (vcpu->mmio_needed) { > - memcpy(vcpu->mmio_data, kvm_run->mmio.data, 8); > + memcpy(vcpu->arch.mmio_data, kvm_run->mmio.data, 8); > kvm_set_mmio_data(vcpu); > vcpu->mmio_read_completed = 1; > vcpu->mmio_needed = 0; > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 0d9a578..4de705c 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -3718,9 +3718,8 @@ struct read_write_emulator_ops { > static int read_prepare(struct kvm_vcpu *vcpu, void *val, int bytes) > { > if (vcpu->mmio_read_completed) { > - memcpy(val, vcpu->mmio_data, bytes); > trace_kvm_mmio(KVM_TRACE_MMIO_READ, bytes, > - vcpu->mmio_phys_addr, *(u64 *)val); > + vcpu->mmio_fragments[0].gpa, *(u64 *)val); > vcpu->mmio_read_completed = 0; > return 1; > } > @@ -3756,8 +3755,9 @@ static int read_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, > static int write_exit_mmio(struct kvm_vcpu *vcpu, gpa_t gpa, > void *val, int bytes) > { > - memcpy(vcpu->mmio_data, val, bytes); > - memcpy(vcpu->run->mmio.data, vcpu->mmio_data, 8); > + struct kvm_mmio_fragment *frag = &vcpu->mmio_fragments[0]; > + > + memcpy(vcpu->run->mmio.data, frag->data, frag->len); > return X86EMUL_CONTINUE; > } > > @@ -3784,10 +3784,7 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, > gpa_t gpa; > int handled, ret; > bool write = ops->write; > - > - if (ops->read_write_prepare && > - ops->read_write_prepare(vcpu, val, bytes)) > - return X86EMUL_CONTINUE; > + struct kvm_mmio_fragment *frag; > > ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); > > @@ -3813,15 +3810,19 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, > bytes -= handled; > val += handled; > > - vcpu->mmio_needed = 1; > - vcpu->run->exit_reason = KVM_EXIT_MMIO; > - vcpu->run->mmio.phys_addr = vcpu->mmio_phys_addr = gpa; > - vcpu->mmio_size = bytes; > - vcpu->run->mmio.len = min(vcpu->mmio_size, 8); > - vcpu->run->mmio.is_write = vcpu->mmio_is_write = write; > - vcpu->mmio_index = 0; > + while (bytes) { > + unsigned now = min(bytes, 8U); > > - return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); > + frag = &vcpu->mmio_fragments[vcpu->mmio_nr_fragments++]; > + frag->gpa = gpa; > + frag->data = val; > + frag->len = now; > + > + gpa += now; > + val += now; > + bytes -= now; > + } > + return X86EMUL_CONTINUE; > } > > int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, > @@ -3830,10 +3831,18 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, > struct read_write_emulator_ops *ops) > { > struct kvm_vcpu *vcpu = emul_to_vcpu(ctxt); > + gpa_t gpa; > + int rc; > + > + if (ops->read_write_prepare && > + ops->read_write_prepare(vcpu, val, bytes)) > + return X86EMUL_CONTINUE; > + > + vcpu->mmio_nr_fragments = 0; > > /* Crossing a page boundary? */ > if (((addr + bytes - 1) ^ addr) & PAGE_MASK) { > - int rc, now; > + int now; > > now = -addr & ~PAGE_MASK; > rc = emulator_read_write_onepage(addr, val, now, exception, > @@ -3846,8 +3855,25 @@ int emulator_read_write(struct x86_emulate_ctxt *ctxt, unsigned long addr, > bytes -= now; > } > > - return emulator_read_write_onepage(addr, val, bytes, exception, > - vcpu, ops); > + rc = emulator_read_write_onepage(addr, val, bytes, exception, > + vcpu, ops); > + if (rc != X86EMUL_CONTINUE) > + return rc; > + > + if (!vcpu->mmio_nr_fragments) > + return rc; > + > + gpa = vcpu->mmio_fragments[0].gpa; > + > + vcpu->mmio_needed = 1; > + vcpu->mmio_cur_fragment = 0; > + > + vcpu->run->mmio.len = vcpu->mmio_fragments[0].len; > + vcpu->run->mmio.is_write = vcpu->mmio_is_write = ops->write; > + vcpu->run->exit_reason = KVM_EXIT_MMIO; > + vcpu->run->mmio.phys_addr = gpa; > + > + return ops->read_write_exit_mmio(vcpu, gpa, val, bytes); > } > > static int emulator_read_emulated(struct x86_emulate_ctxt *ctxt, > @@ -5446,33 +5472,55 @@ static int __vcpu_run(struct kvm_vcpu *vcpu) > return r; > } > > +/* > + * Implements the following, as a state machine: > + * > + * read: > + * for each fragment > + * write gpa, len > + * exit > + * copy data > + * execute insn > + * > + * write: > + * for each fragment > + * write gpa, len > + * copy data > + * exit > + */ > static int complete_mmio(struct kvm_vcpu *vcpu) > { > struct kvm_run *run = vcpu->run; > + struct kvm_mmio_fragment *frag; > int r; > > if (!(vcpu->arch.pio.count || vcpu->mmio_needed)) > return 1; > > if (vcpu->mmio_needed) { > - vcpu->mmio_needed = 0; > + /* Complete previous fragment */ > + frag = &vcpu->mmio_fragments[vcpu->mmio_cur_fragment++]; > if (!vcpu->mmio_is_write) > - memcpy(vcpu->mmio_data + vcpu->mmio_index, > - run->mmio.data, 8); > - vcpu->mmio_index += 8; > - if (vcpu->mmio_index < vcpu->mmio_size) { > - run->exit_reason = KVM_EXIT_MMIO; > - run->mmio.phys_addr = vcpu->mmio_phys_addr + vcpu->mmio_index; > - memcpy(run->mmio.data, vcpu->mmio_data + vcpu->mmio_index, 8); > - run->mmio.len = min(vcpu->mmio_size - vcpu->mmio_index, 8); > - run->mmio.is_write = vcpu->mmio_is_write; > - vcpu->mmio_needed = 1; > - return 0; > + memcpy(frag->data, run->mmio.data, frag->len); > + if (vcpu->mmio_cur_fragment == vcpu->mmio_nr_fragments) { > + vcpu->mmio_needed = 0; > + if (vcpu->mmio_is_write) > + return 1; > + vcpu->mmio_read_completed = 1; > + goto done; > } > + /* Initiate next fragment */ > + ++frag; > + run->exit_reason = KVM_EXIT_MMIO; > + run->mmio.phys_addr = frag->gpa; > if (vcpu->mmio_is_write) > - return 1; > - vcpu->mmio_read_completed = 1; > + memcpy(run->mmio.data, frag->data, frag->len); > + run->mmio.len = frag->len; > + run->mmio.is_write = vcpu->mmio_is_write; > + return 0; > + > } > +done: > vcpu->srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); > r = emulate_instruction(vcpu, EMULTYPE_NO_DECODE); > srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 49c2f2f..738b5ed 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -35,6 +35,20 @@ > #endif > > /* > + * If we support unaligned MMIO, at most one fragment will be split into two: > + */ > +#ifdef KVM_UNALIGNED_MMIO > +# define KVM_EXTRA_MMIO_FRAGMENTS 1 > +#else > +# define KVM_EXTRA_MMIO_FRAGMENTS 0 > +#endif > + > +#define KVM_USER_MMIO_SIZE 8 > + > +#define KVM_MAX_MMIO_FRAGMENTS \ > + (KVM_MMIO_SIZE / KVM_USER_MMIO_SIZE + KVM_EXTRA_MMIO_FRAGMENTS) > + > +/* > * vcpu->requests bit members > */ > #define KVM_REQ_TLB_FLUSH 0 > @@ -117,6 +131,16 @@ enum { > EXITING_GUEST_MODE > }; > > +/* > + * Sometimes a large or cross-page mmio needs to be broken up into separate > + * exits for userspace servicing. > + */ > +struct kvm_mmio_fragment { > + gpa_t gpa; > + void *data; > + unsigned len; > +}; > + > struct kvm_vcpu { > struct kvm *kvm; > #ifdef CONFIG_PREEMPT_NOTIFIERS > @@ -144,10 +168,9 @@ struct kvm_vcpu { > int mmio_needed; > int mmio_read_completed; > int mmio_is_write; > - int mmio_size; > - int mmio_index; > - unsigned char mmio_data[KVM_MMIO_SIZE]; > - gpa_t mmio_phys_addr; > + int mmio_cur_fragment; > + int mmio_nr_fragments; > + struct kvm_mmio_fragment mmio_fragments[KVM_MAX_MMIO_FRAGMENTS]; > #endif > > #ifdef CONFIG_KVM_ASYNC_PF > -- > 1.7.10 > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- Gleb. -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html