Currently the method of dealing with an IO operation on a bus (PIO/MMIO) is to call the read or write callback for each device registered on the bus until we find a device which handles it. Since the number of devices on a bus can be significant due to ioeventfds and coalesced MMIO zones, this leads to a lot of overhead on each IO operation. Instead of registering devices, we now register ranges which points to a device. Lookup is done using an efficient bsearch instead of a linear search. This should speed up all IO operations generated by the guest. Cc: Avi Kivity <avi@xxxxxxxxxx> Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx> Signed-off-by: Sasha Levin <levinsasha928@xxxxxxxxx> --- This patch depends on '[PATCH v3] MMIO: Make coalesced mmio use a device per zone'. arch/x86/kvm/i8254.c | 4 +- arch/x86/kvm/i8259.c | 4 +- include/linux/kvm_host.h | 18 ++++---- virt/kvm/coalesced_mmio.c | 6 +-- virt/kvm/eventfd.c | 3 +- virt/kvm/ioapic.c | 13 +----- virt/kvm/kvm_main.c | 107 +++++++++++++++++++++++++++++++++++++++----- 7 files changed, 115 insertions(+), 40 deletions(-) diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index efad723..61d193c 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -713,13 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier); kvm_iodevice_init(&pit->dev, &pit_dev_ops); - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, + KVM_PIT_MEM_LENGTH, &pit->dev); if (ret < 0) goto fail; if (flags & KVM_PIT_SPEAKER_DUMMY) { kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops); ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, + KVM_SPEAKER_BASE_ADDRESS, 4, &pit->speaker_dev); if (ret < 0) goto fail_unregister; diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 19fe855..c2295af 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -562,7 +562,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) */ kvm_iodevice_init(&s->dev, &picdev_ops); mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, &s->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev); mutex_unlock(&kvm->slots_lock); if (ret < 0) { kfree(s); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4766178..512fed4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -55,16 +55,16 @@ struct kvm; struct kvm_vcpu; extern struct kmem_cache *kvm_vcpu_cache; -/* - * It would be nice to use something smarter than a linear search, TBD... - * Thankfully we dont expect many devices to register (famous last words :), - * so until then it will suffice. At least its abstracted so we can change - * in one place. - */ +struct kvm_io_range { + gpa_t addr; + int len; + struct kvm_io_device *dev; +}; + struct kvm_io_bus { - int dev_count; + int dev_count; #define NR_IOBUS_DEVS 300 - struct kvm_io_device *devs[NR_IOBUS_DEVS]; + struct kvm_io_range range[NR_IOBUS_DEVS]; }; enum kvm_bus { @@ -78,7 +78,7 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val); int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev); + gpa_t addr, int len, struct kvm_io_device *dev); int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index c0e37d7..70f69cf 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -60,9 +60,6 @@ static int coalesced_mmio_write(struct kvm_io_device *this, struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; - if (!coalesced_mmio_in_range(dev, addr, len)) - return -EOPNOTSUPP; - spin_lock(&dev->kvm->coalesced_zones.lock); if (!coalesced_mmio_has_room(dev)) { @@ -141,7 +138,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, dev->zone = *zone; mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, + zone->size, &dev->dev); mutex_unlock(&kvm->slots_lock); if (ret < 0) goto out_free_dev; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 73358d2..ab02042 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) kvm_iodevice_init(&p->dev, &ioeventfd_ops); - ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, + p->length, &p->dev); if (ret < 0) goto unlock_fail; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 8df1ca1..72559a7 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -271,19 +271,11 @@ static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev) return container_of(dev, struct kvm_ioapic, dev); } -static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr) -{ - return ((addr >= ioapic->base_address && - (addr < ioapic->base_address + IOAPIC_MEM_LENGTH))); -} - static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, void *val) { struct kvm_ioapic *ioapic = to_ioapic(this); u32 result; - if (!ioapic_in_range(ioapic, addr)) - return -EOPNOTSUPP; ioapic_debug("addr %lx\n", (unsigned long)addr); ASSERT(!(addr & 0xf)); /* check alignment */ @@ -325,8 +317,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, { struct kvm_ioapic *ioapic = to_ioapic(this); u32 data; - if (!ioapic_in_range(ioapic, addr)) - return -EOPNOTSUPP; ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n", (void*)addr, len, val); @@ -394,7 +384,8 @@ int kvm_ioapic_init(struct kvm *kvm) kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); ioapic->kvm = kvm; mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, + IOAPIC_MEM_LENGTH, &ioapic->dev); mutex_unlock(&kvm->slots_lock); if (ret < 0) { kvm->arch.vioapic = NULL; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aefdda3..28c6f01 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2391,24 +2391,99 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) int i; for (i = 0; i < bus->dev_count; i++) { - struct kvm_io_device *pos = bus->devs[i]; + struct kvm_io_device *pos = bus->range[i].dev; kvm_iodevice_destructor(pos); } kfree(bus); } +int kvm_io_bus_find_closest_dev_idx(struct kvm_io_bus *bus, + gpa_t addr, int len) +{ + int start = 0, end = bus->dev_count - 1; + + if (bus->dev_count == 0) + return -1; + + while (start <= end) { + int mid = (start + end) / 2; + struct kvm_io_range *range = &bus->range[mid]; + + if (addr > range->addr) + start = mid + 1; + else if (addr < range->addr) + end = mid - 1; + else + return mid; + } + + return start - 1; +} + +struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus, + gpa_t addr, int len) +{ + int idx; + + idx = kvm_io_bus_find_closest_dev_idx(bus, addr, len); + if (idx < 0) + return NULL; + + /* Verify that [addr, addr+len] is contained within the range */ + if ((bus->range[idx].addr > addr) || + ((bus->range[idx].addr + bus->range[idx].len) < (addr + len))) + return NULL; + + return bus->range[idx].dev; +} + +int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, + gpa_t addr, int len) +{ + int new_idx, i; + + if (bus->dev_count == NR_IOBUS_DEVS) + return -ENOSPC; + + /* This is where the new entry should be located */ + new_idx = kvm_io_bus_find_closest_dev_idx(bus, addr, len); + + /* This should be the first device on the bus */ + if (new_idx < 0) + new_idx = 0; + /* Place the device after the existing device */ + else + new_idx++; + + /* Clear it by shifting the array to the right at index */ + memmove(&bus->range[new_idx + 1], &bus->range[new_idx], + sizeof(bus->range[new_idx]) * + (bus->dev_count - new_idx)); + + bus->range[new_idx] = (struct kvm_io_range) { + .addr = addr, + .len = len, + .dev = dev, + }; + + bus->dev_count++; + + return 0; +} + /* kvm_io_bus_write - called under kvm->slots_lock */ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { - int i; struct kvm_io_bus *bus; + struct kvm_io_device *dev; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - for (i = 0; i < bus->dev_count; i++) - if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) - return 0; + dev = kvm_io_bus_find_dev(bus, addr, len); + if (dev && !kvm_iodevice_write(dev, addr, len, val)) + return 0; + return -EOPNOTSUPP; } @@ -2416,19 +2491,20 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { - int i; struct kvm_io_bus *bus; + struct kvm_io_device *dev; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - for (i = 0; i < bus->dev_count; i++) - if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) - return 0; + dev = kvm_io_bus_find_dev(bus, addr, len); + if (dev && !kvm_iodevice_read(dev, addr, len, val)) + return 0; + return -EOPNOTSUPP; } /* Caller must hold slots_lock. */ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) + gpa_t addr, int len, struct kvm_io_device *dev) { struct kvm_io_bus *new_bus, *bus; @@ -2440,7 +2516,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, if (!new_bus) return -ENOMEM; memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); - new_bus->devs[new_bus->dev_count++] = dev; + kvm_io_bus_insert_dev(new_bus, dev, addr, len); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); @@ -2464,9 +2540,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, r = -ENOENT; for (i = 0; i < new_bus->dev_count; i++) - if (new_bus->devs[i] == dev) { + if (new_bus->range[i].dev == dev) { r = 0; - new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; + memmove(&new_bus->range[i], &new_bus->range[i+1], + sizeof(new_bus->range[i]) * + (new_bus->dev_count - i)); + + new_bus->dev_count--; + i--; break; } -- 1.7.6 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html