On Wed, Oct 20, 2010 at 04:26:31PM +0800, Sheng Yang wrote: > It would be work with KVM_CAP_DEVICE_MSIX_MASK, which we would enable in the > last patch. > > Signed-off-by: Sheng Yang <sheng@xxxxxxxxxxxxxxx> Merge this with patch 8 - it does not make sense to add a bunch of users of the field msix_mmio_base but init it in the next patch. > --- > include/linux/kvm.h | 7 +++ > include/linux/kvm_host.h | 2 + > virt/kvm/assigned-dev.c | 131 ++++++++++++++++++++++++++++++++++++++++++++++ > 3 files changed, 140 insertions(+), 0 deletions(-) > > diff --git a/include/linux/kvm.h b/include/linux/kvm.h > index a699ec9..0a7bd34 100644 > --- a/include/linux/kvm.h > +++ b/include/linux/kvm.h > @@ -798,4 +798,11 @@ struct kvm_assigned_msix_entry { > __u16 padding[2]; > }; > > +struct kvm_assigned_msix_mmio { > + __u32 assigned_dev_id; I think avi commented - there's padding here. > + __u64 base_addr; > + __u32 flags; > + __u32 reserved[2]; > +}; > + > #endif /* __LINUX_KVM_H */ > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 81a6284..b67082f 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -465,6 +465,8 @@ struct kvm_assigned_dev_kernel { > struct pci_dev *dev; > struct kvm *kvm; > spinlock_t assigned_dev_lock; > + u64 msix_mmio_base; > + struct kvm_io_device msix_mmio_dev; > }; > > struct kvm_irq_mask_notifier { > diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c > index bf96ea7..5d2adc4 100644 > --- a/virt/kvm/assigned-dev.c > +++ b/virt/kvm/assigned-dev.c > @@ -739,6 +739,137 @@ msix_entry_out: > > return r; > } > + > +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev, > + gpa_t addr, int len, int *idx) > +{ > + int i; > + > + if (!(adev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX)) > + return false; > + BUG_ON(adev->msix_mmio_base == 0); > + for (i = 0; i < adev->entries_nr; i++) { > + u64 start, end; > + start = adev->msix_mmio_base + > + adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE; > + end = start + PCI_MSIX_ENTRY_SIZE; > + if (addr >= start && addr + len <= end) { > + *idx = i; > + return true; > + } > + } We really should not need guest_msix_entries at all: if we are emulating MSIX in kernel anyway, let us just emulate it there. Doing half setup from qemu and half from kvm will just create problems. If you do it all in kernel, you will simply need a single range check to see whether this is mask write. > + return false; > +} > + > +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, > + void *val) > +{ > + struct kvm_assigned_dev_kernel *adev = > + container_of(this, struct kvm_assigned_dev_kernel, > + msix_mmio_dev); > + int idx, r = 0; > + u32 entry[4]; > + struct kvm_kernel_irq_routing_entry *e; > + > + mutex_lock(&adev->kvm->lock); > + if (!msix_mmio_in_range(adev, addr, len, &idx)) { > + r = -EOPNOTSUPP; > + goto out; > + } > + if ((addr & 0x3) || len != 4) { > + printk(KERN_WARNING > + "KVM: Unaligned reading for device MSI-X MMIO! " > + "addr 0x%llx, len %d\n", addr, len); > + r = -EOPNOTSUPP; > + goto out; > + } > + > + e = kvm_get_irq_routing_entry(adev->kvm, > + adev->guest_msix_entries[idx].vector); > + if (!e || e->type != KVM_IRQ_ROUTING_MSI) { > + printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! " > + "addr 0x%llx, len %d\n", addr, len); > + r = -EOPNOTSUPP; > + goto out; > + } > + entry[0] = e->msi.address_lo; > + entry[1] = e->msi.address_hi; > + entry[2] = e->msi.data; > + entry[3] = !!(adev->guest_msix_entries[idx].flags & > + KVM_ASSIGNED_MSIX_MASK); > + memcpy(val, &entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len); > + > +out: > + mutex_unlock(&adev->kvm->lock); > + return r; > +} > + > +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, > + const void *val) > +{ > + struct kvm_assigned_dev_kernel *adev = > + container_of(this, struct kvm_assigned_dev_kernel, > + msix_mmio_dev); > + int idx, r = 0; > + unsigned long new_val = *(unsigned long *)val; > + bool entry_masked; > + > + mutex_lock(&adev->kvm->lock); > + if (!msix_mmio_in_range(adev, addr, len, &idx)) { > + r = -EOPNOTSUPP; > + goto out; > + } > + if ((addr & 0x3) || len != 4) { > + printk(KERN_WARNING > + "KVM: Unaligned writing for device MSI-X MMIO! " > + "addr 0x%llx, len %d, val 0x%lx\n", > + addr, len, new_val); > + r = -EOPNOTSUPP; > + goto out; > + } > + entry_masked = adev->guest_msix_entries[idx].flags & > + KVM_ASSIGNED_MSIX_MASK; > + if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) { > + /* Only allow entry modification when entry was masked */ > + if (!entry_masked) { > + printk(KERN_WARNING > + "KVM: guest try to write unmasked MSI-X entry. " > + "addr 0x%llx, len %d, val 0x%lx\n", > + addr, len, new_val); > + r = 0; > + } else > + /* Leave it to QEmu */ > + r = -EOPNOTSUPP; So half the emulation is here half is there... Let's just put it all in kernel and be done with it? > + goto out; > + } > + if (new_val & ~1ul) { > + printk(KERN_WARNING > + "KVM: Bad writing for device MSI-X MMIO! " > + "addr 0x%llx, len %d, val 0x%lx\n", > + addr, len, new_val); > + r = -EOPNOTSUPP; > + goto out; > + } > + if (new_val == 1 && !entry_masked) { > + adev->guest_msix_entries[idx].flags |= > + KVM_ASSIGNED_MSIX_MASK; > + update_msix_mask(adev, idx); > + } else if (new_val == 0 && entry_masked) { > + adev->guest_msix_entries[idx].flags &= > + ~KVM_ASSIGNED_MSIX_MASK; > + update_msix_mask(adev, idx); > + } > +out: > + mutex_unlock(&adev->kvm->lock); > + > + return r; > +} > + > +static const struct kvm_io_device_ops msix_mmio_ops = { > + .read = msix_mmio_read, > + .write = msix_mmio_write, > +}; > + > #endif > > long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, > -- > 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html