On Mon, Mar 01, 2021 at 04:16:28PM +0300, Elena Afanasova wrote: Thanks for posting this piece of the ioregionfd testing infrastructure! Please include a commit description even if it's just an RFC patch. People on qemu-devel may not be aware of ioregionfd or the purpose of this patch. Something like: Add ioregionfd support to pc-testdev for use with kvm-unit-tests that exercise the new KVM ioregionfd API. ioregionfd is a new MMIO/PIO dispatch mechanism for KVM. The kernel patches implementing ioregionfd are available here: https://patchwork.kernel.org/project/kvm/list/?series=436083 The kvm-unit-test will create one read FIFO and one write FIFO and then invoke QEMU like this: --device pc-testdev,addr=0x100000000,size=4096,rfifo=path/to/rfifo,wfifo=path/to/wfifo QEMU does not actually process the read FIFO or write FIFO. It simply registers an ioregionfd with KVM. From then on KVM will communicate directly with the kvm-unit-test via the read FIFO and write FIFO that were provided. This allows test cases to handle MMIO/PIO accesses. Elena and I discussed the long-term QEMU API for ioregionfd on IRC. Eventually this test device could use it instead of directly calling kvm_vm_ioctl(). The new QEMU API would be memory_region_set_aio_context(AioContext *aio_context). When ioregionfd is available an ioregion would be registered with KVM and AioContext will have a read fd to handle MMIO/PIO accesses for any of its ioregions. This way device emulation can run outside the vcpu thread. When ioregionfd is not available QEMU can emulate it by writing a struct ioregionfd_cmd to the write fd from the vcpu thread. The relevant AioContext will handle the read fd as normal and dispatch the MMIO/PIO access to the MemoryRegion. > Signed-off-by: Elena Afanasova <eafanasova@xxxxxxxxx> > --- > hw/misc/pc-testdev.c | 74 +++++++++++++++++++++++++++++++++++++++ > include/sysemu/kvm.h | 4 +-- > linux-headers/linux/kvm.h | 24 +++++++++++++ > 3 files changed, 100 insertions(+), 2 deletions(-) > > diff --git a/hw/misc/pc-testdev.c b/hw/misc/pc-testdev.c > index e389651869..38355923ca 100644 > --- a/hw/misc/pc-testdev.c > +++ b/hw/misc/pc-testdev.c > @@ -40,6 +40,9 @@ > #include "hw/irq.h" > #include "hw/isa/isa.h" > #include "qom/object.h" > +#include "sysemu/kvm.h" > +#include <linux/kvm.h> > +#include "hw/qdev-properties.h" > > #define IOMEM_LEN 0x10000 > > @@ -53,6 +56,15 @@ struct PCTestdev { > MemoryRegion iomem; > uint32_t ioport_data; > char iomem_buf[IOMEM_LEN]; > + > + uint64_t guest_paddr; > + uint64_t memory_size; > + char *read_fifo; > + char *write_fifo; > + bool posted_writes; > + bool pio; > + int rfd; > + int wfd; > }; > > #define TYPE_TESTDEV "pc-testdev" > @@ -169,6 +181,9 @@ static const MemoryRegionOps test_iomem_ops = { > > static void testdev_realizefn(DeviceState *d, Error **errp) > { > + struct kvm_ioregion ioreg; Zero this so that user_data is always 0 instead of undefined data from the stack? > + int flags = 0; > + > ISADevice *isa = ISA_DEVICE(d); > PCTestdev *dev = TESTDEV(d); > MemoryRegion *mem = isa_address_space(isa); > @@ -191,14 +206,73 @@ static void testdev_realizefn(DeviceState *d, Error **errp) > memory_region_add_subregion(io, 0xe8, &dev->ioport_byte); > memory_region_add_subregion(io, 0x2000, &dev->irq); > memory_region_add_subregion(mem, 0xff000000, &dev->iomem); > + > + if (!dev->guest_paddr || !dev->write_fifo) { > + return; > + } > + > + dev->wfd = open(dev->write_fifo, O_WRONLY); > + if (dev->wfd < 0) { > + error_report("failed to open write fifo %s", dev->write_fifo); > + return; > + } > + > + if (dev->read_fifo) { > + dev->rfd = open(dev->read_fifo, O_RDONLY); > + if (dev->rfd < 0) { > + error_report("failed to open read fifo %s", dev->read_fifo); > + close(dev->wfd); > + return; > + } > + } > + > + flags |= dev->pio ? KVM_IOREGION_PIO : 0; > + flags |= dev->posted_writes ? KVM_IOREGION_POSTED_WRITES : 0; > + ioreg.guest_paddr = dev->guest_paddr; > + ioreg.memory_size = dev->memory_size; > + ioreg.write_fd = dev->wfd; > + ioreg.read_fd = dev->rfd; > + ioreg.flags = flags; > + kvm_vm_ioctl(kvm_state, KVM_SET_IOREGION, &ioreg); Printing an error message would be useful when this fails (e.g. when the region overlaps with an existing ioeventfd/ioregionfd). > +} > + > +static void testdev_unrealizefn(DeviceState *d) > +{ > + struct kvm_ioregion ioreg; > + PCTestdev *dev = TESTDEV(d); > + > + if (!dev->guest_paddr || !dev->write_fifo) { > + return; > + } > + > + ioreg.guest_paddr = dev->guest_paddr; > + ioreg.memory_size = dev->memory_size; > + ioreg.flags = KVM_IOREGION_DEASSIGN; > + kvm_vm_ioctl(kvm_state, KVM_SET_IOREGION, &ioreg); > + close(dev->wfd); > + if (dev->rfd > 0) { > + close(dev->rfd); > + } > } > > +static Property ioregionfd_properties[] = { > + DEFINE_PROP_UINT64("addr", PCTestdev, guest_paddr, 0), > + DEFINE_PROP_UINT64("size", PCTestdev, memory_size, 0), > + DEFINE_PROP_STRING("rfifo", PCTestdev, read_fifo), > + DEFINE_PROP_STRING("wfifo", PCTestdev, write_fifo), > + DEFINE_PROP_BOOL("pio", PCTestdev, pio, false), > + DEFINE_PROP_BOOL("pw", PCTestdev, posted_writes, false), > + DEFINE_PROP_END_OF_LIST(), > +}; > + > static void testdev_class_init(ObjectClass *klass, void *data) > { > DeviceClass *dc = DEVICE_CLASS(klass); > > set_bit(DEVICE_CATEGORY_MISC, dc->categories); > dc->realize = testdev_realizefn; > + dc->unrealize = testdev_unrealizefn; > + device_class_set_props(dc, ioregionfd_properties); > } > > static const TypeInfo testdev_info = { > diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h > index 687c598be9..d68728764a 100644 > --- a/include/sysemu/kvm.h > +++ b/include/sysemu/kvm.h > @@ -234,6 +234,8 @@ int kvm_has_intx_set_mask(void); > bool kvm_arm_supports_user_irq(void); > > > +int kvm_vm_ioctl(KVMState *s, int type, ...); > + > #ifdef NEED_CPU_H > #include "cpu.h" > > @@ -257,8 +259,6 @@ void phys_mem_set_alloc(void *(*alloc)(size_t, uint64_t *align, bool shared)); > > int kvm_ioctl(KVMState *s, int type, ...); > > -int kvm_vm_ioctl(KVMState *s, int type, ...); > - > int kvm_vcpu_ioctl(CPUState *cpu, int type, ...); > > /** > diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h > index 020b62a619..c426fa1e56 100644 > --- a/linux-headers/linux/kvm.h > +++ b/linux-headers/linux/kvm.h > @@ -733,6 +733,29 @@ struct kvm_ioeventfd { > __u8 pad[36]; > }; > > +enum { > + kvm_ioregion_flag_nr_pio, > + kvm_ioregion_flag_nr_posted_writes, > + kvm_ioregion_flag_nr_deassign, > + kvm_ioregion_flag_nr_max, > +}; > + > +#define KVM_IOREGION_PIO (1 << kvm_ioregion_flag_nr_pio) > +#define KVM_IOREGION_POSTED_WRITES (1 << kvm_ioregion_flag_nr_posted_writes) > +#define KVM_IOREGION_DEASSIGN (1 << kvm_ioregion_flag_nr_deassign) > + > +#define KVM_IOREGION_VALID_FLAG_MASK ((1 << kvm_ioregion_flag_nr_max) - 1) > + > +struct kvm_ioregion { > + __u64 guest_paddr; /* guest physical address */ > + __u64 memory_size; /* bytes */ > + __u64 user_data; > + __s32 read_fd; > + __s32 write_fd; > + __u32 flags; > + __u8 pad[28]; > +}; > + > #define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) > #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) > #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) > @@ -1311,6 +1334,7 @@ struct kvm_vfio_spapr_tce { > struct kvm_userspace_memory_region) > #define KVM_SET_TSS_ADDR _IO(KVMIO, 0x47) > #define KVM_SET_IDENTITY_MAP_ADDR _IOW(KVMIO, 0x48, __u64) > +#define KVM_SET_IOREGION _IOW(KVMIO, 0x49, struct kvm_ioregion) > > /* enable ucontrol for s390 */ > struct kvm_s390_ucas_mapping { > -- > 2.25.1 >
Attachment:
signature.asc
Description: PGP signature