From: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx> The device will demand the collection of vcpus' numa info, and trigger the guest to rebuild the sched domain. Signed-off-by: Liu Ping Fan <pingfank@xxxxxxxxxxxxxxxxxx> --- Makefile.target | 1 + hmp-commands.hx | 16 +++++ hw/qdev.h | 1 + hw/virt_sd.c | 155 +++++++++++++++++++++++++++++++++++++++++++++ linux-headers/linux/kvm.h | 8 ++- 5 files changed, 180 insertions(+), 1 deletions(-) create mode 100644 hw/virt_sd.c diff --git a/Makefile.target b/Makefile.target index 4fbbabf..fded330 100644 --- a/Makefile.target +++ b/Makefile.target @@ -265,6 +265,7 @@ obj-i386-y += pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o obj-i386-y += pc_piix.o obj-i386-y += pc_sysfw.o +obj-i386-y += virt_sd.o obj-i386-$(CONFIG_KVM) += kvm/clock.o kvm/apic.o kvm/i8259.o kvm/ioapic.o kvm/i8254.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o diff --git a/hmp-commands.hx b/hmp-commands.hx index 461fa59..47b826c 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -1254,6 +1254,22 @@ Change I/O throttle limits for a block drive to @var{bps} @var{bps_rd} @var{bps_ ETEXI { + .name = "guest_numa_notify", + .args_type = "", + .params = "", + .help = "force guest to update numa info based on host", + .user_print = monitor_user_noop, + .mhandler.cmd_new = do_guest_numa_notify, + }, + +STEXI +@item device_add @var{config} +@findex device_add + +Add device. +ETEXI + + { .name = "block_set_io_throttle", .args_type = "device:B,bps:l,bps_rd:l,bps_wr:l,iops:l,iops_rd:l,iops_wr:l", .params = "device bps bps_rd bps_wr iops iops_rd iops_wr", diff --git a/hw/qdev.h b/hw/qdev.h index 4e90119..6902474 100644 --- a/hw/qdev.h +++ b/hw/qdev.h @@ -203,6 +203,7 @@ void do_info_qtree(Monitor *mon); void do_info_qdm(Monitor *mon); int do_device_add(Monitor *mon, const QDict *qdict, QObject **ret_data); int do_device_del(Monitor *mon, const QDict *qdict, QObject **ret_data); +int do_guest_numa_notify(Monitor *mon, const QDict *qdict, QObject **ret_data); /*** qdev-properties.c ***/ diff --git a/hw/virt_sd.c b/hw/virt_sd.c new file mode 100644 index 0000000..c3aece4 --- /dev/null +++ b/hw/virt_sd.c @@ -0,0 +1,155 @@ +/* + * Virt sched domain Support + * + * Copyright IBM, Corp. 2012 + * + * Authors: + * Liu Ping Fan <pingfanl@xxxxxxxxxxxxxxxxxx> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * +*/ +#include "hw.h" +#include "pci.h" +#include "kvm.h" +#include <linux/kvm.h> + +/* #define DEBUG_VSD */ +#ifdef DEBUG_VSD +#define dprintf(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define dprintf(fmt, ...) \ + do { } while (0) +#endif + +#define PCI_DEVICE_ID_CPUSTATE 0x1010 + +typedef struct VirtSdState VirtSdState; +typedef struct Regs Regs; + +#define VSD_REGS_SIZE 0x1000 +struct Regs { + unsigned int gpa_apic_node; + unsigned int size; +}; + +struct VirtSdState { + PCIDevice dev; + MemoryRegion mmio; + Regs regs; +}; + +static const VMStateDescription vmstate_vsd = { + .name = "vsd", + .version_id = 1, + .minimum_version_id = 0, + .fields = (VMStateField[]) { + VMSTATE_END_OF_LIST() + }, +}; + +static VirtSdState *vsd_dev; + +static int update_guest_numa(void) +{ + int ret = 0; + target_phys_addr_t sz; + struct kvm_virt_sd vsd; + sz = vsd.sz = vsd_dev->regs.size; + vsd.vapic_map = cpu_physical_memory_map(vsd_dev->regs.gpa_apic_node, + &sz, 1); + ret = kvm_ioctl(kvm_state, KVM_SET_GUEST_NUMA, &vsd); + if (ret < 0) { + return -1; + } else { + qemu_set_irq(vsd_dev->dev.irq[0], 1); + qemu_set_irq(vsd_dev->dev.irq[0], 0); + } + return 0; +} + +int do_guest_numa_notify(Monitor *mon, const QDict *qdict, QObject **ret_data) +{ + return update_guest_numa(); +} + +static void +vsd_mmio_write(void *opaque, target_phys_addr_t addr, uint64_t val, + unsigned size) +{ + VirtSdState *vsd = opaque; + dprintf("vsd_mmio_write,addr=0x%lx, val=0x%lx\n", addr, val); + switch (addr) { + case 0: + vsd->regs.gpa_apic_node = val; + break; + case 4: + vsd->regs.size = val; + break; + default: + fprintf(stderr, "reg unimplemented\n"); + break; + } +} + +static uint64_t +vsd_mmio_read(void *opaque, target_phys_addr_t addr, unsigned size) +{ + return 0; +} + +static const MemoryRegionOps vsd_ops = { + .read = vsd_mmio_read, + .write = vsd_mmio_write, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + +static int pci_vsd_init(PCIDevice *dev) +{ + uint8_t *pci_cfg = dev->config; + VirtSdState *s = DO_UPCAST(VirtSdState, dev, dev); + memory_region_init_io(&s->mmio, &vsd_ops, s, "vsd", VSD_REGS_SIZE); + vsd_dev = s; + pci_cfg[PCI_INTERRUPT_PIN] = 1; + pci_cfg[PCI_CAPABILITY_LIST] = 0xdc; + pci_register_bar(&s->dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, &s->mmio); + return 0; +} + +static int pci_vsd_exit(PCIDevice *dev) +{ + return 0; +} + +static Property vsd_properties[] = { + DEFINE_PROP_END_OF_LIST(), +}; + +static void vsd_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->init = pci_vsd_init; + k->exit = pci_vsd_exit; + k->vendor_id = PCI_VENDOR_ID_IBM; + k->device_id = PCI_DEVICE_ID_CPUSTATE; + k->revision = 0x10; + k->class_id = PCI_CLASS_MEMORY_RAM; + dc->props = vsd_properties; +} + +static TypeInfo vsd_info = { + .name = "vsd", + .parent = TYPE_PCI_DEVICE, + .instance_size = sizeof(VirtSdState), + .class_init = vsd_class_init, +}; + +static void vsd_register_types(void) +{ + type_register_static(&vsd_info); +} +type_init(vsd_register_types) diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index ee7bd9c..aa5aec3 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -448,7 +448,6 @@ struct kvm_ppc_pvinfo { __u32 hcall[4]; __u8 pad[108]; }; - #define KVMIO 0xAE /* machine type bits, to be used as argument to KVM_CREATE_VM */ @@ -478,6 +477,7 @@ struct kvm_ppc_pvinfo { #define KVM_TRACE_PAUSE __KVM_DEPRECATED_MAIN_0x07 #define KVM_TRACE_DISABLE __KVM_DEPRECATED_MAIN_0x08 + /* * Extension capability list. */ @@ -733,6 +733,7 @@ struct kvm_one_reg { struct kvm_userspace_memory_region) #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) +#define KVM_SET_GUEST_NUMA _IOW(KVMIO, 0x49, struct kvm_virt_sd) /* enable ucontrol for s390 */ struct kvm_s390_ucas_mapping { @@ -913,4 +914,9 @@ struct kvm_assigned_msix_entry { __u16 padding[3]; }; +struct kvm_virt_sd { + __u64 *vapic_map; + __u64 sz; +}; + #endif /* __LINUX_KVM_H */ -- 1.7.4.4 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html