[PATCH] IO: Intelligent device lookup on bus

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Currently the method of dealing with an IO operation on a bus (PIO/MMIO)
is to call the read or write callback for each device registered
on the bus until we find a device which handles it.

Since the number of devices on a bus can be significant due to ioeventfds
and coalesced MMIO zones, this leads to a lot of overhead on each IO
operation.

Instead of registering devices, we now register ranges which points to
a device. Lookup is done using an efficient bsearch instead of a linear
search.

This should speed up all IO operations generated by the guest.

Cc: Avi Kivity <avi@xxxxxxxxxx>
Cc: Marcelo Tosatti <mtosatti@xxxxxxxxxx>
Signed-off-by: Sasha Levin <levinsasha928@xxxxxxxxx>
---

This patch depends on '[PATCH v3] MMIO: Make coalesced mmio use a device
per zone'.

 arch/x86/kvm/i8254.c      |    4 +-
 arch/x86/kvm/i8259.c      |    4 +-
 include/linux/kvm_host.h  |   18 ++++----
 virt/kvm/coalesced_mmio.c |    6 +--
 virt/kvm/eventfd.c        |    3 +-
 virt/kvm/ioapic.c         |   13 +-----
 virt/kvm/kvm_main.c       |  107 +++++++++++++++++++++++++++++++++++++++-----
 7 files changed, 115 insertions(+), 40 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index efad723..61d193c 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -713,13 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags)
 	kvm_register_irq_mask_notifier(kvm, 0, &pit->mask_notifier);
 
 	kvm_iodevice_init(&pit->dev, &pit_dev_ops);
-	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &pit->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS,
+					KVM_PIT_MEM_LENGTH, &pit->dev);
 	if (ret < 0)
 		goto fail;
 
 	if (flags & KVM_PIT_SPEAKER_DUMMY) {
 		kvm_iodevice_init(&pit->speaker_dev, &speaker_dev_ops);
 		ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,
+						KVM_SPEAKER_BASE_ADDRESS, 4,
 						&pit->speaker_dev);
 		if (ret < 0)
 			goto fail_unregister;
diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c
index 19fe855..c2295af 100644
--- a/arch/x86/kvm/i8259.c
+++ b/arch/x86/kvm/i8259.c
@@ -562,7 +562,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm)
 	 */
 	kvm_iodevice_init(&s->dev, &picdev_ops);
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, &s->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2, &s->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2, &s->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2, &s->dev);
 	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0) {
 		kfree(s);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 4766178..512fed4 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -55,16 +55,16 @@ struct kvm;
 struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
-/*
- * It would be nice to use something smarter than a linear search, TBD...
- * Thankfully we dont expect many devices to register (famous last words :),
- * so until then it will suffice.  At least its abstracted so we can change
- * in one place.
- */
+struct kvm_io_range {
+	gpa_t			addr;
+	int			len;
+	struct kvm_io_device	*dev;
+};
+
 struct kvm_io_bus {
-	int                   dev_count;
+	int			dev_count;
 #define NR_IOBUS_DEVS 300
-	struct kvm_io_device *devs[NR_IOBUS_DEVS];
+	struct kvm_io_range	range[NR_IOBUS_DEVS];
 };
 
 enum kvm_bus {
@@ -78,7 +78,7 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len,
 		    void *val);
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			    struct kvm_io_device *dev);
+			    gpa_t addr, int len, struct kvm_io_device *dev);
 int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 			      struct kvm_io_device *dev);
 
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index c0e37d7..70f69cf 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -60,9 +60,6 @@ static int coalesced_mmio_write(struct kvm_io_device *this,
 	struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
 	struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
 
-	if (!coalesced_mmio_in_range(dev, addr, len))
-		return -EOPNOTSUPP;
-
 	spin_lock(&dev->kvm->coalesced_zones.lock);
 
 	if (!coalesced_mmio_has_room(dev)) {
@@ -141,7 +138,8 @@ int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
 	dev->zone = *zone;
 
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
+					zone->size, &dev->dev);
 	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0)
 		goto out_free_dev;
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 73358d2..ab02042 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
 
 	kvm_iodevice_init(&p->dev, &ioeventfd_ops);
 
-	ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
+	ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr,
+					p->length, &p->dev);
 	if (ret < 0)
 		goto unlock_fail;
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8df1ca1..72559a7 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -271,19 +271,11 @@ static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
 	return container_of(dev, struct kvm_ioapic, dev);
 }
 
-static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
-{
-	return ((addr >= ioapic->base_address &&
-		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
-}
-
 static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
 			    void *val)
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 result;
-	if (!ioapic_in_range(ioapic, addr))
-		return -EOPNOTSUPP;
 
 	ioapic_debug("addr %lx\n", (unsigned long)addr);
 	ASSERT(!(addr & 0xf));	/* check alignment */
@@ -325,8 +317,6 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
 {
 	struct kvm_ioapic *ioapic = to_ioapic(this);
 	u32 data;
-	if (!ioapic_in_range(ioapic, addr))
-		return -EOPNOTSUPP;
 
 	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
 		     (void*)addr, len, val);
@@ -394,7 +384,8 @@ int kvm_ioapic_init(struct kvm *kvm)
 	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
 	ioapic->kvm = kvm;
 	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+					IOAPIC_MEM_LENGTH, &ioapic->dev);
 	mutex_unlock(&kvm->slots_lock);
 	if (ret < 0) {
 		kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aefdda3..28c6f01 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2391,24 +2391,99 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
 	int i;
 
 	for (i = 0; i < bus->dev_count; i++) {
-		struct kvm_io_device *pos = bus->devs[i];
+		struct kvm_io_device *pos = bus->range[i].dev;
 
 		kvm_iodevice_destructor(pos);
 	}
 	kfree(bus);
 }
 
+int kvm_io_bus_find_closest_dev_idx(struct kvm_io_bus *bus,
+					gpa_t addr, int len)
+{
+	int start = 0, end = bus->dev_count - 1;
+
+	if (bus->dev_count == 0)
+		return -1;
+
+	while (start <= end) {
+		int mid = (start + end) / 2;
+		struct kvm_io_range *range = &bus->range[mid];
+
+		if (addr > range->addr)
+			start = mid + 1;
+		else if (addr < range->addr)
+			end = mid - 1;
+		else
+			return mid;
+	}
+
+	return start - 1;
+}
+
+struct kvm_io_device *kvm_io_bus_find_dev(struct kvm_io_bus *bus,
+						gpa_t addr, int len)
+{
+	int idx;
+
+	idx = kvm_io_bus_find_closest_dev_idx(bus, addr, len);
+	if (idx < 0)
+		return NULL;
+
+	/* Verify that [addr, addr+len] is contained within the range */
+	if ((bus->range[idx].addr > addr) ||
+		((bus->range[idx].addr + bus->range[idx].len) < (addr + len)))
+		return NULL;
+
+	return bus->range[idx].dev;
+}
+
+int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+				gpa_t addr, int len)
+{
+	int new_idx, i;
+
+	if (bus->dev_count == NR_IOBUS_DEVS)
+		return -ENOSPC;
+
+	/* This is where the new entry should be located */
+	new_idx = kvm_io_bus_find_closest_dev_idx(bus, addr, len);
+
+	/* This should be the first device on the bus */
+	if (new_idx < 0)
+		new_idx = 0;
+	/* Place the device after the existing device */
+	else
+		new_idx++;
+
+	/* Clear it by shifting the array to the right at index */
+	memmove(&bus->range[new_idx + 1], &bus->range[new_idx],
+			sizeof(bus->range[new_idx]) * 
+			(bus->dev_count - new_idx));
+
+	bus->range[new_idx] = (struct kvm_io_range) {
+		.addr = addr,
+		.len = len,
+		.dev = dev,
+	};
+
+	bus->dev_count++;
+
+	return 0;
+}
+
 /* kvm_io_bus_write - called under kvm->slots_lock */
 int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		     int len, const void *val)
 {
-	int i;
 	struct kvm_io_bus *bus;
+	struct kvm_io_device *dev;
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
-			return 0;
+	dev = kvm_io_bus_find_dev(bus, addr, len);
+	if (dev && !kvm_iodevice_write(dev, addr, len, val))
+		return 0;
+
 	return -EOPNOTSUPP;
 }
 
@@ -2416,19 +2491,20 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
 		    int len, void *val)
 {
-	int i;
 	struct kvm_io_bus *bus;
+	struct kvm_io_device *dev;
 
 	bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
-	for (i = 0; i < bus->dev_count; i++)
-		if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
-			return 0;
+	dev = kvm_io_bus_find_dev(bus, addr, len);
+	if (dev && !kvm_iodevice_read(dev, addr, len, val))
+		return 0;
+
 	return -EOPNOTSUPP;
 }
 
 /* Caller must hold slots_lock. */
 int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
-			    struct kvm_io_device *dev)
+			    gpa_t addr, int len, struct kvm_io_device *dev)
 {
 	struct kvm_io_bus *new_bus, *bus;
 
@@ -2440,7 +2516,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 	if (!new_bus)
 		return -ENOMEM;
 	memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
-	new_bus->devs[new_bus->dev_count++] = dev;
+	kvm_io_bus_insert_dev(new_bus, dev, addr, len);
 	rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
 	synchronize_srcu_expedited(&kvm->srcu);
 	kfree(bus);
@@ -2464,9 +2540,14 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
 
 	r = -ENOENT;
 	for (i = 0; i < new_bus->dev_count; i++)
-		if (new_bus->devs[i] == dev) {
+		if (new_bus->range[i].dev == dev) {
 			r = 0;
-			new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
+			memmove(&new_bus->range[i], &new_bus->range[i+1],
+				sizeof(new_bus->range[i]) *
+				(new_bus->dev_count - i));
+			
+			new_bus->dev_count--;
+			i--;
 			break;
 		}
 
-- 
1.7.6

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux