On 10/20/2010 10:26 AM, Sheng Yang wrote:
It would be work with KVM_CAP_DEVICE_MSIX_MASK, which we would enable in the last patch. +struct kvm_assigned_msix_mmio { + __u32 assigned_dev_id; + __u64 base_addr;
Different alignment and size on 32 and 64 bits. Is base_addr a guest physical address? Do we need a size or it it fixed?
+ __u32 flags; + __u32 reserved[2]; +}; + @@ -465,6 +465,8 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; spinlock_t assigned_dev_lock; + u64 msix_mmio_base;
gpa_t.
+ struct kvm_io_device msix_mmio_dev; }; struct kvm_irq_mask_notifier { diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index bf96ea7..5d2adc4 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -739,6 +739,137 @@ msix_entry_out: return r; } + +static bool msix_mmio_in_range(struct kvm_assigned_dev_kernel *adev, + gpa_t addr, int len, int *idx) +{ + int i; + + if (!(adev->irq_requested_type& KVM_DEV_IRQ_HOST_MSIX)) + return false;
Just don't install the io_device in that case.
+ BUG_ON(adev->msix_mmio_base == 0); + for (i = 0; i< adev->entries_nr; i++) { + u64 start, end; + start = adev->msix_mmio_base + + adev->guest_msix_entries[i].entry * PCI_MSIX_ENTRY_SIZE; + end = start + PCI_MSIX_ENTRY_SIZE; + if (addr>= start&& addr + len<= end) { + *idx = i; + return true; + }
What if it's a partial hit? write part of an entry and part of another entry?
+ } + return false; +} + +static int msix_mmio_read(struct kvm_io_device *this, gpa_t addr, int len, + void *val) +{ + struct kvm_assigned_dev_kernel *adev = + container_of(this, struct kvm_assigned_dev_kernel, + msix_mmio_dev); + int idx, r = 0; + u32 entry[4]; + struct kvm_kernel_irq_routing_entry *e; + + mutex_lock(&adev->kvm->lock); + if (!msix_mmio_in_range(adev, addr, len,&idx)) { + r = -EOPNOTSUPP; + goto out; + } + if ((addr& 0x3) || len != 4) { + printk(KERN_WARNING + "KVM: Unaligned reading for device MSI-X MMIO! " + "addr 0x%llx, len %d\n", addr, len);
Guest exploitable printk()
+ r = -EOPNOTSUPP;
If the guest assigned the device to another guest, it allows the nested guest to kill the non-nested guest. Need to exit in a graceful fashion.
+ goto out; + } + + e = kvm_get_irq_routing_entry(adev->kvm, + adev->guest_msix_entries[idx].vector); + if (!e || e->type != KVM_IRQ_ROUTING_MSI) { + printk(KERN_WARNING "KVM: Wrong MSI-X routing entry! " + "addr 0x%llx, len %d\n", addr, len); + r = -EOPNOTSUPP; + goto out; + } + entry[0] = e->msi.address_lo; + entry[1] = e->msi.address_hi; + entry[2] = e->msi.data; + entry[3] = !!(adev->guest_msix_entries[idx].flags& + KVM_ASSIGNED_MSIX_MASK); + memcpy(val,&entry[addr % PCI_MSIX_ENTRY_SIZE / 4], len); + +out: + mutex_unlock(&adev->kvm->lock); + return r; +} + +static int msix_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, + const void *val) +{ + struct kvm_assigned_dev_kernel *adev = + container_of(this, struct kvm_assigned_dev_kernel, + msix_mmio_dev); + int idx, r = 0; + unsigned long new_val = *(unsigned long *)val; + bool entry_masked; + + mutex_lock(&adev->kvm->lock); + if (!msix_mmio_in_range(adev, addr, len,&idx)) { + r = -EOPNOTSUPP; + goto out; + } + if ((addr& 0x3) || len != 4) { + printk(KERN_WARNING + "KVM: Unaligned writing for device MSI-X MMIO! " + "addr 0x%llx, len %d, val 0x%lx\n", + addr, len, new_val); + r = -EOPNOTSUPP; + goto out; + } + entry_masked = adev->guest_msix_entries[idx].flags& + KVM_ASSIGNED_MSIX_MASK; + if (addr % PCI_MSIX_ENTRY_SIZE != PCI_MSIX_ENTRY_VECTOR_CTRL) { + /* Only allow entry modification when entry was masked */ + if (!entry_masked) { + printk(KERN_WARNING + "KVM: guest try to write unmasked MSI-X entry. " + "addr 0x%llx, len %d, val 0x%lx\n", + addr, len, new_val); + r = 0;
What does the spec says about this situation?
+ } else + /* Leave it to QEmu */
s/qemu/userspace/
+ r = -EOPNOTSUPP;
What would userspace do in this situation? I hope you documented precisely what the kernel handles and what it doesn't?
I prefer more kernel code in the kernel to having an interface which is hard to use correctly.
+ goto out; + } + if (new_val& ~1ul) {
Is there a #define for this bit?
+ printk(KERN_WARNING + "KVM: Bad writing for device MSI-X MMIO! " + "addr 0x%llx, len %d, val 0x%lx\n", + addr, len, new_val); + r = -EOPNOTSUPP; + goto out; + } + if (new_val == 1&& !entry_masked) { + adev->guest_msix_entries[idx].flags |= + KVM_ASSIGNED_MSIX_MASK; + update_msix_mask(adev, idx); + } else if (new_val == 0&& entry_masked) { + adev->guest_msix_entries[idx].flags&= + ~KVM_ASSIGNED_MSIX_MASK; + update_msix_mask(adev, idx); + }
Ah, I see you do reuse update_msix_mask().
+out: + mutex_unlock(&adev->kvm->lock); + + return r; +} + +static const struct kvm_io_device_ops msix_mmio_ops = { + .read = msix_mmio_read, + .write = msix_mmio_write, +}; + #endif long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html