On 07/24/2011 09:15 AM, Sasha Levin wrote:
Currently the method of dealing with an IO operation on a bus (PIO/MMIO) is to call the read or write callback for each device registered on the bus until we find a device which handles it. Since the number of devices on a bus can be significant due to ioeventfds and coalesced MMIO zones, this leads to a lot of overhead on each IO operation. Instead of registering devices, we now register ranges which points to a device. Lookup is done using an efficient bsearch instead of a linear search. Performance test was conducted by comparing exit count per second with 200 ioeventfds created on one byte and the guest is trying to access a different byte continuously (triggering usermode exits). Before the patch the guest has achieved 259k exits per second, after the patch the guest does 274k exits per second. diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index efad723..094e057 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -713,14 +713,15 @@ struct kvm_pit *kvm_create_pit(struct kvm *kvm, u32 flags) kvm_register_irq_mask_notifier(kvm, 0,&pit->mask_notifier); kvm_iodevice_init(&pit->dev,&pit_dev_ops); - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&pit->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, KVM_PIT_BASE_ADDRESS, KVM_PIT_MEM_LENGTH,&pit->dev);
Long line.
-static inline struct kvm_pic *to_pic(struct kvm_io_device *dev) +static inline struct kvm_pic *to_pic(struct kvm_io_device *dev, gpa_t addr) { - return container_of(dev, struct kvm_pic, dev); + switch (addr) { + case 0x20: + case 0x21: + return container_of(dev, struct kvm_pic, dev_master); + case 0xa0: + case 0xa1: + return container_of(dev, struct kvm_pic, dev_slave); + case 0x4d0: + case 0x4d1: + return container_of(dev, struct kvm_pic, dev_eclr); + } + + return NULL; }
Somewhat ugly. I think int picdev_write_master(...) { return pcidev_write(container_of(...), ...); } is nicer, no?
@@ -560,16 +572,36 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) /* * Initialize PIO device */ - kvm_iodevice_init(&s->dev,&picdev_ops); + kvm_iodevice_init(&s->dev_master,&picdev_ops); + kvm_iodevice_init(&s->dev_slave,&picdev_ops); + kvm_iodevice_init(&s->dev_eclr,&picdev_ops); mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS,&s->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x20, 2,&s->dev_master); + if (ret< 0) + goto fail_unlock; + + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0xa0, 2,&s->dev_slave); + if (ret< 0) + goto fail_unlock; + + ret = kvm_io_bus_register_dev(kvm, KVM_PIO_BUS, 0x4d0, 2,&s->dev_eclr); + if (ret< 0) + goto fail_unlock; + mutex_unlock(&kvm->slots_lock); - if (ret< 0) { - kfree(s); - return NULL; - } return s; + +fail_unlock: + + mutex_unlock(&kvm->slots_lock); + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_master); + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_slave); + kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS,&s->dev_eclr); + + kfree(s); + + return NULL; }
You're unregistering devices that were never registered. It may work now, but it's fragile.
if (ret< 0) goto out_free_dev; list_add_tail(&dev->list,&kvm->coalesced_zones); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 73358d2..f59c1e8 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) kvm_iodevice_init(&p->dev,&ioeventfd_ops); - ret = kvm_io_bus_register_dev(kvm, bus_idx,&p->dev); + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, + &p->dev);
Should this be p->length or 1?
#include<asm/processor.h> #include<asm/io.h> @@ -2391,24 +2393,94 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) int i; for (i = 0; i< bus->dev_count; i++) { - struct kvm_io_device *pos = bus->devs[i]; + struct kvm_io_device *pos = bus->range[i].dev;
This will call the destructor three times for the PIC. Is this safe? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html