On Sun, Jun 28, 2009 at 10:34:25PM +0300, Michael S. Tsirkin wrote: > This changes bus accesses to use high-level kvm_io_bus_read/kvm_io_bus_write > functions, which utilize read/write semaphore intead of mutex. in_range now > becomes unused so it is removed from device ops in favor of read/write > callbacks performing range checks internally. > > This allows aliasing (mostly for in-kernel virtio), as well as better error > handling by making it possible to pass errors up to userspace. And it's enough > to look at the diffstat to see that it's a better API anyway. Can you please expand a little on this? Still don't get why making ->in_range part of ->read / ->write is a good thing. Aliasing? Agree that proliferation of locks is a bad thing. > While we are at it, document locking rules for kvm_io_device_ops. > > Note: since the use of the new bus_lock is localized to a small number of > places, it will be easy to switch to srcu in the future if we so desire. > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > --- > > OK, here's an updated version of the patch. I punted on rcu and went > with rw semaphore for now, but it will be easy to change now that the > use of the semaphore is fairly isolated. Works for me, but please > review carefully. > > arch/ia64/kvm/kvm-ia64.c | 28 +++-------- > arch/x86/kvm/i8254.c | 53 +++++++++++---------- > arch/x86/kvm/i8259.c | 22 +++++---- > arch/x86/kvm/lapic.c | 44 ++++++++---------- > arch/x86/kvm/x86.c | 112 ++++++++++++++------------------------------- > include/linux/kvm_host.h | 10 +++- > virt/kvm/coalesced_mmio.c | 28 ++++++------ > virt/kvm/ioapic.c | 24 +++++---- > virt/kvm/iodev.h | 42 +++++++---------- > virt/kvm/kvm_main.c | 41 ++++++++++++----- > 10 files changed, 184 insertions(+), 220 deletions(-) > > diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c > index c1c5cb6..5bf5100 100644 > --- a/arch/ia64/kvm/kvm-ia64.c > +++ b/arch/ia64/kvm/kvm-ia64.c > @@ -210,16 +210,6 @@ int kvm_dev_ioctl_check_extension(long ext) > > } > > -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, > - gpa_t addr, int len, int is_write) > -{ > - struct kvm_io_device *dev; > - > - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, is_write); > - > - return dev; > -} > - > static int handle_vm_error(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > { > kvm_run->exit_reason = KVM_EXIT_UNKNOWN; > @@ -231,6 +221,7 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > { > struct kvm_mmio_req *p; > struct kvm_io_device *mmio_dev; > + int r; > > p = kvm_get_vcpu_ioreq(vcpu); > > @@ -247,16 +238,13 @@ static int handle_mmio(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > kvm_run->exit_reason = KVM_EXIT_MMIO; > return 0; > mmio: > - mmio_dev = vcpu_find_mmio_dev(vcpu, p->addr, p->size, !p->dir); > - if (mmio_dev) { > - if (!p->dir) > - kvm_iodevice_write(mmio_dev, p->addr, p->size, > - &p->data); > - else > - kvm_iodevice_read(mmio_dev, p->addr, p->size, > - &p->data); > - > - } else > + if (p->dir) > + r = kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->mmio_bus, p->addr, > + p->size, &p->data); > + else > + r = kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->mmio_bus, p->addr, > + p->size, &p->data); > + if (r) > printk(KERN_ERR"kvm: No iodevice found! addr:%lx\n", p->addr); > p->state = STATE_IORESP_READY; > > diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c > index 331705f..3155ffa 100644 > --- a/arch/x86/kvm/i8254.c > +++ b/arch/x86/kvm/i8254.c > @@ -357,8 +357,14 @@ static inline struct kvm_pit *speaker_to_pit(struct kvm_io_device *dev) > return container_of(dev, struct kvm_pit, speaker_dev); > } > > -static void pit_ioport_write(struct kvm_io_device *this, > - gpa_t addr, int len, const void *data) > +static inline int pit_in_range(gpa_t addr) > +{ > + return ((addr >= KVM_PIT_BASE_ADDRESS) && > + (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); > +} > + > +static int pit_ioport_write(struct kvm_io_device *this, > + gpa_t addr, int len, const void *data) > { > struct kvm_pit *pit = dev_to_pit(this); > struct kvm_kpit_state *pit_state = &pit->pit_state; > @@ -366,6 +372,8 @@ static void pit_ioport_write(struct kvm_io_device *this, > int channel, access; > struct kvm_kpit_channel_state *s; > u32 val = *(u32 *) data; > + if (!pit_in_range(addr)) > + return -EOPNOTSUPP; > > val &= 0xff; > addr &= KVM_PIT_CHANNEL_MASK; > @@ -428,16 +436,19 @@ static void pit_ioport_write(struct kvm_io_device *this, > } > > mutex_unlock(&pit_state->lock); > + return 0; > } > > -static void pit_ioport_read(struct kvm_io_device *this, > - gpa_t addr, int len, void *data) > +static int pit_ioport_read(struct kvm_io_device *this, > + gpa_t addr, int len, void *data) > { > struct kvm_pit *pit = dev_to_pit(this); > struct kvm_kpit_state *pit_state = &pit->pit_state; > struct kvm *kvm = pit->kvm; > int ret, count; > struct kvm_kpit_channel_state *s; > + if (!pit_in_range(addr)) > + return -EOPNOTSUPP; > > addr &= KVM_PIT_CHANNEL_MASK; > s = &pit_state->channels[addr]; > @@ -492,37 +503,36 @@ static void pit_ioport_read(struct kvm_io_device *this, > memcpy(data, (char *)&ret, len); > > mutex_unlock(&pit_state->lock); > + return 0; > } > > -static int pit_in_range(struct kvm_io_device *this, gpa_t addr, > - int len, int is_write) > -{ > - return ((addr >= KVM_PIT_BASE_ADDRESS) && > - (addr < KVM_PIT_BASE_ADDRESS + KVM_PIT_MEM_LENGTH)); > -} > - > -static void speaker_ioport_write(struct kvm_io_device *this, > - gpa_t addr, int len, const void *data) > +static int speaker_ioport_write(struct kvm_io_device *this, > + gpa_t addr, int len, const void *data) > { > struct kvm_pit *pit = speaker_to_pit(this); > struct kvm_kpit_state *pit_state = &pit->pit_state; > struct kvm *kvm = pit->kvm; > u32 val = *(u32 *) data; > + if (addr != KVM_SPEAKER_BASE_ADDRESS) > + return -EOPNOTSUPP; > > mutex_lock(&pit_state->lock); > pit_state->speaker_data_on = (val >> 1) & 1; > pit_set_gate(kvm, 2, val & 1); > mutex_unlock(&pit_state->lock); > + return 0; > } > > -static void speaker_ioport_read(struct kvm_io_device *this, > - gpa_t addr, int len, void *data) > +static int speaker_ioport_read(struct kvm_io_device *this, > + gpa_t addr, int len, void *data) > { > struct kvm_pit *pit = speaker_to_pit(this); > struct kvm_kpit_state *pit_state = &pit->pit_state; > struct kvm *kvm = pit->kvm; > unsigned int refresh_clock; > int ret; > + if (addr != KVM_SPEAKER_BASE_ADDRESS) > + return -EOPNOTSUPP; > > /* Refresh clock toggles at about 15us. We approximate as 2^14ns. */ > refresh_clock = ((unsigned int)ktime_to_ns(ktime_get()) >> 14) & 1; > @@ -534,12 +544,7 @@ static void speaker_ioport_read(struct kvm_io_device *this, > len = sizeof(ret); > memcpy(data, (char *)&ret, len); > mutex_unlock(&pit_state->lock); > -} > - > -static int speaker_in_range(struct kvm_io_device *this, gpa_t addr, > - int len, int is_write) > -{ > - return (addr == KVM_SPEAKER_BASE_ADDRESS); > + return 0; > } > > void kvm_pit_reset(struct kvm_pit *pit) > @@ -573,13 +578,11 @@ static void pit_mask_notifer(struct kvm_irq_mask_notifier *kimn, bool mask) > static const struct kvm_io_device_ops pit_dev_ops = { > .read = pit_ioport_read, > .write = pit_ioport_write, > - .in_range = pit_in_range, > }; > > static const struct kvm_io_device_ops speaker_dev_ops = { > .read = speaker_ioport_read, > .write = speaker_ioport_write, > - .in_range = speaker_in_range, > }; > > struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) > @@ -620,11 +623,11 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) > kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); > > kvm_iodevice_init(&pit->dev, &pit_dev_ops); > - kvm_io_bus_register_dev(&kvm->pio_bus, &pit->dev); > + kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &pit->dev); > > if (flags & KVM_PIT_SPEAKER_DUMMY) { > kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); > - kvm_io_bus_register_dev(&kvm->pio_bus, &pit->speaker_dev); > + kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &pit->speaker_dev); > } > > return pit; > diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c > index 148c52a..1d1bb75 100644 > --- a/arch/x86/kvm/i8259.c > +++ b/arch/x86/kvm/i8259.c > @@ -430,8 +430,7 @@ static u32 elcr_ioport_read(void *opaque, u32 addr1) > return s->elcr; > } > > -static int picdev_in_range(struct kvm_io_device *this, gpa_t addr, > - int len, int is_write) > +static int picdev_in_range(gpa_t addr) > { > switch (addr) { > case 0x20: > @@ -451,16 +450,18 @@ static inline struct kvm_pic *to_pic(struct kvm_io_device *dev) > return container_of(dev, struct kvm_pic, dev); > } > > -static void picdev_write(struct kvm_io_device *this, > +static int picdev_write(struct kvm_io_device *this, > gpa_t addr, int len, const void *val) > { > struct kvm_pic *s = to_pic(this); > unsigned char data = *(unsigned char *)val; > + if (!picdev_in_range(addr)) > + return -EOPNOTSUPP; > > if (len != 1) { > if (printk_ratelimit()) > printk(KERN_ERR "PIC: non byte write\n"); > - return; > + return 0; > } > pic_lock(s); > switch (addr) { > @@ -476,18 +477,21 @@ static void picdev_write(struct kvm_io_device *this, > break; > } > pic_unlock(s); > + return 0; > } > > -static void picdev_read(struct kvm_io_device *this, > - gpa_t addr, int len, void *val) > +static int picdev_read(struct kvm_io_device *this, > + gpa_t addr, int len, void *val) > { > struct kvm_pic *s = to_pic(this); > unsigned char data = 0; > + if (!picdev_in_range(addr)) > + return -EOPNOTSUPP; > > if (len != 1) { > if (printk_ratelimit()) > printk(KERN_ERR "PIC: non byte read\n"); > - return; > + return 0; > } > pic_lock(s); > switch (addr) { > @@ -504,6 +508,7 @@ static void picdev_read(struct kvm_io_device *this, > } > *(unsigned char *)val = data; > pic_unlock(s); > + return 0; > } > > /* > @@ -526,7 +531,6 @@ static void pic_irq_request(void *opaque, int level) > static const struct kvm_io_device_ops picdev_ops = { > .read = picdev_read, > .write = picdev_write, > - .in_range = picdev_in_range, > }; > > struct kvm_pic *kvm_create_pic(struct kvm *kvm) > @@ -548,6 +552,6 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) > * Initialize PIO device > */ > kvm_iodevice_init(&s->dev, &picdev_ops); > - kvm_io_bus_register_dev(&kvm->pio_bus, &s->dev); > + kvm_io_bus_register_dev(kvm, &kvm->pio_bus, &s->dev); > return s; > } > diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c > index 2e02865..265a765 100644 > --- a/arch/x86/kvm/lapic.c > +++ b/arch/x86/kvm/lapic.c > @@ -546,18 +546,27 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev) > return container_of(dev, struct kvm_lapic, dev); > } > > -static void apic_mmio_read(struct kvm_io_device *this, > - gpa_t address, int len, void *data) > +static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr) > +{ > + return apic_hw_enabled(apic) && > + addr >= apic->base_address && > + addr < apic->base_address + LAPIC_MMIO_LENGTH; > +} > + > +static int apic_mmio_read(struct kvm_io_device *this, > + gpa_t address, int len, void *data) > { > struct kvm_lapic *apic = to_lapic(this); > unsigned int offset = address - apic->base_address; > unsigned char alignment = offset & 0xf; > u32 result; > + if (!apic_mmio_in_range(apic, address)) > + return -EOPNOTSUPP; > > if ((alignment + len) > 4) { > printk(KERN_ERR "KVM_APIC_READ: alignment error %lx %d", > (unsigned long)address, len); > - return; > + return 0; > } > result = __apic_read(apic, offset & ~0xf); > > @@ -574,6 +583,7 @@ static void apic_mmio_read(struct kvm_io_device *this, > "should be 1,2, or 4 instead\n", len); > break; > } > + return 0; > } > > static void update_divide_count(struct kvm_lapic *apic) > @@ -629,13 +639,15 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) > apic->vcpu->kvm->arch.vapics_in_nmi_mode--; > } > > -static void apic_mmio_write(struct kvm_io_device *this, > - gpa_t address, int len, const void *data) > +static int apic_mmio_write(struct kvm_io_device *this, > + gpa_t address, int len, const void *data) > { > struct kvm_lapic *apic = to_lapic(this); > unsigned int offset = address - apic->base_address; > unsigned char alignment = offset & 0xf; > u32 val; > + if (!apic_mmio_in_range(apic, address)) > + return -EOPNOTSUPP; > > /* > * APIC register must be aligned on 128-bits boundary. > @@ -646,7 +658,7 @@ static void apic_mmio_write(struct kvm_io_device *this, > /* Don't shout loud, $infamous_os would cause only noise. */ > apic_debug("apic write: bad size=%d %lx\n", > len, (long)address); > - return; > + return 0; > } > > val = *(u32 *) data; > @@ -729,7 +741,7 @@ static void apic_mmio_write(struct kvm_io_device *this, > hrtimer_cancel(&apic->lapic_timer.timer); > apic_set_reg(apic, APIC_TMICT, val); > start_apic_timer(apic); > - return; > + return 0; > > case APIC_TDCR: > if (val & 4) > @@ -743,22 +755,7 @@ static void apic_mmio_write(struct kvm_io_device *this, > offset); > break; > } > - > -} > - > -static int apic_mmio_range(struct kvm_io_device *this, gpa_t addr, > - int len, int size) > -{ > - struct kvm_lapic *apic = to_lapic(this); > - int ret = 0; > - > - > - if (apic_hw_enabled(apic) && > - (addr >= apic->base_address) && > - (addr < (apic->base_address + LAPIC_MMIO_LENGTH))) > - ret = 1; > - > - return ret; > + return 0; > } > > void kvm_free_lapic(struct kvm_vcpu *vcpu) > @@ -938,7 +935,6 @@ static struct kvm_timer_ops lapic_timer_ops = { > static const struct kvm_io_device_ops apic_mmio_ops = { > .read = apic_mmio_read, > .write = apic_mmio_write, > - .in_range = apic_mmio_range, > }; > > int kvm_create_lapic(struct kvm_vcpu *vcpu) > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 5a66bb9..badc23f 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -2260,35 +2260,23 @@ static void kvm_init_msr_list(void) > num_msrs_to_save = j; > } > > -/* > - * Only apic need an MMIO device hook, so shortcut now.. > - */ > -static struct kvm_io_device *vcpu_find_pervcpu_dev(struct kvm_vcpu *vcpu, > - gpa_t addr, int len, > - int is_write) > +static int vcpu_mmio_write(struct kvm_vcpu *vcpu, gpa_t addr, int len, > + const void *v) > { > - struct kvm_io_device *dev; > + if (vcpu->arch.apic && > + !kvm_iodevice_write(&vcpu->arch.apic->dev, addr, len, v)) > + return 0; > > - if (vcpu->arch.apic) { > - dev = &vcpu->arch.apic->dev; > - if (kvm_iodevice_in_range(dev, addr, len, is_write)) > - return dev; > - } > - return NULL; > + return kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->mmio_bus, addr, len, v); > } > > - > -static struct kvm_io_device *vcpu_find_mmio_dev(struct kvm_vcpu *vcpu, > - gpa_t addr, int len, > - int is_write) > +static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v) > { > - struct kvm_io_device *dev; > + if (vcpu->arch.apic && > + !kvm_iodevice_read(&vcpu->arch.apic->dev, addr, len, v)) > + return 0; > > - dev = vcpu_find_pervcpu_dev(vcpu, addr, len, is_write); > - if (dev == NULL) > - dev = kvm_io_bus_find_dev(&vcpu->kvm->mmio_bus, addr, len, > - is_write); > - return dev; > + return kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->mmio_bus, addr, len, v); > } > > static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes, > @@ -2357,7 +2345,6 @@ static int emulator_read_emulated(unsigned long addr, > unsigned int bytes, > struct kvm_vcpu *vcpu) > { > - struct kvm_io_device *mmio_dev; > gpa_t gpa; > > if (vcpu->mmio_read_completed) { > @@ -2382,13 +2369,8 @@ mmio: > /* > * Is this MMIO handled locally? > */ > - mutex_lock(&vcpu->kvm->lock); > - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 0); > - mutex_unlock(&vcpu->kvm->lock); > - if (mmio_dev) { > - kvm_iodevice_read(mmio_dev, gpa, bytes, val); > + if (!vcpu_mmio_read(vcpu, gpa, bytes, val)) > return X86EMUL_CONTINUE; > - } > > vcpu->mmio_needed = 1; > vcpu->mmio_phys_addr = gpa; > @@ -2415,7 +2397,6 @@ static int emulator_write_emulated_onepage(unsigned long addr, > unsigned int bytes, > struct kvm_vcpu *vcpu) > { > - struct kvm_io_device *mmio_dev; > gpa_t gpa; > > gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); > @@ -2436,13 +2417,8 @@ mmio: > /* > * Is this MMIO handled locally? > */ > - mutex_lock(&vcpu->kvm->lock); > - mmio_dev = vcpu_find_mmio_dev(vcpu, gpa, bytes, 1); > - mutex_unlock(&vcpu->kvm->lock); > - if (mmio_dev) { > - kvm_iodevice_write(mmio_dev, gpa, bytes, val); > + if (!vcpu_mmio_write(vcpu, gpa, bytes, val)) > return X86EMUL_CONTINUE; > - } > > vcpu->mmio_needed = 1; > vcpu->mmio_phys_addr = gpa; > @@ -2758,48 +2734,42 @@ int complete_pio(struct kvm_vcpu *vcpu) > return 0; > } > > -static void kernel_pio(struct kvm_io_device *pio_dev, > - struct kvm_vcpu *vcpu, > - void *pd) > +static int kernel_pio(struct kvm_vcpu *vcpu, void *pd) > { > /* TODO: String I/O for in kernel device */ > + int r; > > if (vcpu->arch.pio.in) > - kvm_iodevice_read(pio_dev, vcpu->arch.pio.port, > - vcpu->arch.pio.size, > - pd); > + r = kvm_io_bus_read(vcpu->kvm, &vcpu->kvm->pio_bus, > + vcpu->arch.pio.port, > + vcpu->arch.pio.size, pd); > else > - kvm_iodevice_write(pio_dev, vcpu->arch.pio.port, > - vcpu->arch.pio.size, > - pd); > + r = kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->pio_bus, > + vcpu->arch.pio.port, > + vcpu->arch.pio.size, pd); > + return r; > } > > -static void pio_string_write(struct kvm_io_device *pio_dev, > - struct kvm_vcpu *vcpu) > +static int pio_string_write(struct kvm_vcpu *vcpu) > { > struct kvm_pio_request *io = &vcpu->arch.pio; > void *pd = vcpu->arch.pio_data; > - int i; > + int i, r = 0; > > for (i = 0; i < io->cur_count; i++) { > - kvm_iodevice_write(pio_dev, io->port, > - io->size, > - pd); > + if (kvm_io_bus_write(vcpu->kvm, &vcpu->kvm->pio_bus, > + io->port, io->size, pd)) { > + r = -EOPNOTSUPP; > + break; > + } > pd += io->size; > } > -} > - > -static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu, > - gpa_t addr, int len, > - int is_write) > -{ > - return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr, len, is_write); > + return r; > } > > int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, > int size, unsigned port) > { > - struct kvm_io_device *pio_dev; > unsigned long val; > > vcpu->run->exit_reason = KVM_EXIT_IO; > @@ -2819,11 +2789,7 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, > val = kvm_register_read(vcpu, VCPU_REGS_RAX); > memcpy(vcpu->arch.pio_data, &val, 4); > > - mutex_lock(&vcpu->kvm->lock); > - pio_dev = vcpu_find_pio_dev(vcpu, port, size, !in); > - mutex_unlock(&vcpu->kvm->lock); > - if (pio_dev) { > - kernel_pio(pio_dev, vcpu, vcpu->arch.pio_data); > + if (!kernel_pio(vcpu, vcpu->arch.pio_data)) { > complete_pio(vcpu); > return 1; > } > @@ -2837,7 +2803,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, > { > unsigned now, in_page; > int ret = 0; > - struct kvm_io_device *pio_dev; > > vcpu->run->exit_reason = KVM_EXIT_IO; > vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT; > @@ -2881,12 +2846,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, > > vcpu->arch.pio.guest_gva = address; > > - mutex_lock(&vcpu->kvm->lock); > - pio_dev = vcpu_find_pio_dev(vcpu, port, > - vcpu->arch.pio.cur_count, > - !vcpu->arch.pio.in); > - mutex_unlock(&vcpu->kvm->lock); > - > if (!vcpu->arch.pio.in) { > /* string PIO write */ > ret = pio_copy_data(vcpu); > @@ -2894,16 +2853,13 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in, > kvm_inject_gp(vcpu, 0); > return 1; > } > - if (ret == 0 && pio_dev) { > - pio_string_write(pio_dev, vcpu); > + if (ret == 0 && !pio_string_write(vcpu)) { > complete_pio(vcpu); > if (vcpu->arch.pio.count == 0) > ret = 1; > } > - } else if (pio_dev) > - pr_unimpl(vcpu, "no string pio read support yet, " > - "port %x size %d count %ld\n", > - port, size, count); > + } > + /* no string PIO read support yet */ > > return ret; > } > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 2451f48..8337f8e 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -42,6 +42,7 @@ > > #define KVM_USERSPACE_IRQ_SOURCE_ID 0 > > +struct kvm; > struct kvm_vcpu; > extern struct kmem_cache *kvm_vcpu_cache; > > @@ -59,9 +60,11 @@ struct kvm_io_bus { > > void kvm_io_bus_init(struct kvm_io_bus *bus); > void kvm_io_bus_destroy(struct kvm_io_bus *bus); > -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, > - gpa_t addr, int len, int is_write); > -void kvm_io_bus_register_dev(struct kvm_io_bus *bus, > +int kvm_io_bus_write(struct kvm *, struct kvm_io_bus *bus, gpa_t addr, int len, > + const void *val); > +int kvm_io_bus_read(struct kvm *, struct kvm_io_bus *bus, gpa_t addr, int len, > + void *val); > +void kvm_io_bus_register_dev(struct kvm *, struct kvm_io_bus *bus, > struct kvm_io_device *dev); > > struct kvm_vcpu { > @@ -138,6 +141,7 @@ struct kvm { > atomic_t online_vcpus; > struct list_head vm_list; > struct mutex lock; > + struct rw_semaphore bus_lock; > struct kvm_io_bus mmio_bus; > struct kvm_io_bus pio_bus; > #ifdef CONFIG_HAVE_KVM_EVENTFD > diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c > index 397f419..0140218 100644 > --- a/virt/kvm/coalesced_mmio.c > +++ b/virt/kvm/coalesced_mmio.c > @@ -19,17 +19,15 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) > return container_of(dev, struct kvm_coalesced_mmio_dev, dev); > } > > -static int coalesced_mmio_in_range(struct kvm_io_device *this, > - gpa_t addr, int len, int is_write) > +static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, > + gpa_t addr, int len) > { > - struct kvm_coalesced_mmio_dev *dev = to_mmio(this); > struct kvm_coalesced_mmio_zone *zone; > struct kvm_coalesced_mmio_ring *ring; > unsigned avail; > int i; > > - if (!is_write) > - return 0; > + /* kvm->bus_lock is taken by the caller */ > > /* Are we able to batch it ? */ > > @@ -60,11 +58,13 @@ static int coalesced_mmio_in_range(struct kvm_io_device *this, > return 0; > } > > -static void coalesced_mmio_write(struct kvm_io_device *this, > - gpa_t addr, int len, const void *val) > +static int coalesced_mmio_write(struct kvm_io_device *this, > + gpa_t addr, int len, const void *val) > { > struct kvm_coalesced_mmio_dev *dev = to_mmio(this); > struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; > + if (!coalesced_mmio_in_range(dev, addr, len)) > + return -EOPNOTSUPP; > > spin_lock(&dev->lock); > > @@ -76,6 +76,7 @@ static void coalesced_mmio_write(struct kvm_io_device *this, > smp_wmb(); > ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; > spin_unlock(&dev->lock); > + return 0; > } > > static void coalesced_mmio_destructor(struct kvm_io_device *this) > @@ -87,7 +88,6 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this) > > static const struct kvm_io_device_ops coalesced_mmio_ops = { > .write = coalesced_mmio_write, > - .in_range = coalesced_mmio_in_range, > .destructor = coalesced_mmio_destructor, > }; > > @@ -102,7 +102,7 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) > kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); > dev->kvm = kvm; > kvm->coalesced_mmio_dev = dev; > - kvm_io_bus_register_dev(&kvm->mmio_bus, &dev->dev); > + kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &dev->dev); > > return 0; > } > @@ -115,16 +115,16 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, > if (dev == NULL) > return -EINVAL; > > - mutex_lock(&kvm->lock); > + down_write(&kvm->bus_lock); > if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { > - mutex_unlock(&kvm->lock); > + up_write(&kvm->bus_lock); > return -ENOBUFS; > } > > dev->zone[dev->nb_zones] = *zone; > dev->nb_zones++; > > - mutex_unlock(&kvm->lock); > + up_write(&kvm->bus_lock); > return 0; > } > > @@ -138,7 +138,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, > if (dev == NULL) > return -EINVAL; > > - mutex_lock(&kvm->lock); > + down_write(&kvm->bus_lock); > > i = dev->nb_zones; > while(i) { > @@ -156,7 +156,7 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, > i--; > } > > - mutex_unlock(&kvm->lock); > + up_write(&kvm->bus_lock); > > return 0; > } > diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c > index d8b2eca..2d352f7 100644 > --- a/virt/kvm/ioapic.c > +++ b/virt/kvm/ioapic.c > @@ -227,20 +227,19 @@ static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) > return container_of(dev, struct kvm_ioapic, dev); > } > > -static int ioapic_in_range(struct kvm_io_device *this, gpa_t addr, > - int len, int is_write) > +static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) > { > - struct kvm_ioapic *ioapic = to_ioapic(this); > - > return ((addr >= ioapic->base_address && > (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); > } > > -static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, > - void *val) > +static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, > + void *val) > { > struct kvm_ioapic *ioapic = to_ioapic(this); > u32 result; > + if (!ioapic_in_range(ioapic, addr)) > + return -ENOTSUPP; > > ioapic_debug("addr %lx\n", (unsigned long)addr); > ASSERT(!(addr & 0xf)); /* check alignment */ > @@ -273,13 +272,16 @@ static void ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, > printk(KERN_WARNING "ioapic: wrong length %d\n", len); > } > mutex_unlock(&ioapic->kvm->irq_lock); > + return 0; > } > > -static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, > - const void *val) > +static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, > + const void *val) > { > struct kvm_ioapic *ioapic = to_ioapic(this); > u32 data; > + if (!ioapic_in_range(ioapic, addr)) > + return -ENOTSUPP; > > ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", > (void*)addr, len, val); > @@ -290,7 +292,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, > data = *(u32 *) val; > else { > printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); > - return; > + return 0; > } > > addr &= 0xff; > @@ -312,6 +314,7 @@ static void ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, > break; > } > mutex_unlock(&ioapic->kvm->irq_lock); > + return 0; > } > > void kvm_ioapic_reset(struct kvm_ioapic *ioapic) > @@ -329,7 +332,6 @@ void kvm_ioapic_reset(struct kvm_ioapic *ioapic) > static const struct kvm_io_device_ops ioapic_mmio_ops = { > .read = ioapic_mmio_read, > .write = ioapic_mmio_write, > - .in_range = ioapic_in_range, > }; > > int kvm_ioapic_init(struct kvm *kvm) > @@ -343,7 +345,7 @@ int kvm_ioapic_init(struct kvm *kvm) > kvm_ioapic_reset(ioapic); > kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); > ioapic->kvm = kvm; > - kvm_io_bus_register_dev(&kvm->mmio_bus, &ioapic->dev); > + kvm_io_bus_register_dev(kvm, &kvm->mmio_bus, &ioapic->dev); > return 0; > } > > diff --git a/virt/kvm/iodev.h b/virt/kvm/iodev.h > index 2c67f5a..c4b07eb 100644 > --- a/virt/kvm/iodev.h > +++ b/virt/kvm/iodev.h > @@ -17,20 +17,24 @@ > #define __KVM_IODEV_H__ > > #include <linux/kvm_types.h> > +#include <asm/errno.h> > > struct kvm_io_device; > > +/** > + * read and write handlers are called under kvm bus_lock. > + * They return 0 if the transaction has been handled, > + * or non-zero to have it passed to the next device. > + **/ > struct kvm_io_device_ops { > - void (*read)(struct kvm_io_device *this, > + int (*read)(struct kvm_io_device *this, > + gpa_t addr, > + int len, > + void *val); > + int (*write)(struct kvm_io_device *this, > gpa_t addr, > int len, > - void *val); > - void (*write)(struct kvm_io_device *this, > - gpa_t addr, > - int len, > - const void *val); > - int (*in_range)(struct kvm_io_device *this, gpa_t addr, int len, > - int is_write); > + const void *val); > void (*destructor)(struct kvm_io_device *this); > }; > > @@ -45,26 +49,16 @@ static inline void kvm_iodevice_init(struct kvm_io_device *dev, > dev->ops = ops; > } > > -static inline void kvm_iodevice_read(struct kvm_io_device *dev, > - gpa_t addr, > - int len, > - void *val) > +static inline int kvm_iodevice_read(struct kvm_io_device *dev, > + gpa_t addr, int l, void *v) > { > - dev->ops->read(dev, addr, len, val); > + return dev->ops->read ? dev->ops->read(dev, addr, l, v) : -EOPNOTSUPP; > } > > -static inline void kvm_iodevice_write(struct kvm_io_device *dev, > - gpa_t addr, > - int len, > - const void *val) > +static inline int kvm_iodevice_write(struct kvm_io_device *dev, > + gpa_t addr, int l, const void *v) > { > - dev->ops->write(dev, addr, len, val); > -} > - > -static inline int kvm_iodevice_in_range(struct kvm_io_device *dev, > - gpa_t addr, int len, int is_write) > -{ > - return dev->ops->in_range(dev, addr, len, is_write); > + return dev->ops->write ? dev->ops->write(dev, addr, l, v) : -EOPNOTSUPP; > } > > static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 58d6bc6..3b19839 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -981,6 +981,7 @@ static struct kvm *kvm_create_vm(void) > kvm_io_bus_init(&kvm->pio_bus); > kvm_irqfd_init(kvm); > mutex_init(&kvm->lock); > + init_rwsem(&kvm->bus_lock); > mutex_init(&kvm->irq_lock); > kvm_io_bus_init(&kvm->mmio_bus); > init_rwsem(&kvm->slots_lock); > @@ -2484,26 +2485,42 @@ void kvm_io_bus_destroy(struct kvm_io_bus *bus) > } > } > > -struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, > - gpa_t addr, int len, int is_write) > +int kvm_io_bus_write(struct kvm *kvm, struct kvm_io_bus *bus, gpa_t addr, > + int len, const void *val) > { > - int i; > - > - for (i = 0; i < bus->dev_count; i++) { > - struct kvm_io_device *pos = bus->devs[i]; > - > - if (kvm_iodevice_in_range(pos, addr, len, is_write)) > - return pos; > - } > + int i, r = -EOPNOTSUPP; > + down_read(&kvm->bus_lock); > + for (i = 0; i < bus->dev_count; i++) > + if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) { > + r = 0; > + break; > + } > + up_read(&kvm->bus_lock); > + return r; > +} > > - return NULL; > +int kvm_io_bus_read(struct kvm *kvm, struct kvm_io_bus *bus, gpa_t addr, > + int len, void *val) > +{ > + int i, r = -EOPNOTSUPP; > + down_read(&kvm->bus_lock); > + for (i = 0; i < bus->dev_count; i++) > + if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) { > + r = 0; > + break; > + } > + up_read(&kvm->bus_lock); > + return r; > } > > -void kvm_io_bus_register_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev) > +void kvm_io_bus_register_dev(struct kvm *kvm, struct kvm_io_bus *bus, > + struct kvm_io_device *dev) > { > + down_write(&kvm->bus_lock); > BUG_ON(bus->dev_count > (NR_IOBUS_DEVS-1)); > > bus->devs[bus->dev_count++] = dev; > + up_write(&kvm->bus_lock); > } > > static struct notifier_block kvm_cpu_notifier = { > -- > 1.6.2.2 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html