Status update

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi everybody,


Here's a status update (a public one this time) on my work:

---

Accomplishments:
- [DONE] Generic IOMMU API for devices
- [DONE] PIIX IDE uses the new API
- [DONE] Permissions handling

Bonus:
- [DONE] RTL8139 converted

Objectives:
- [WIP] Reporting IO page faults in the event log
	- will require MSI support
- [WIP] Inject some errors for testing
- Relay IOMMU configuration data to SeaBIOS (last devfn etc.)
- Handle IOMMU updates correctly during AIO (see below)
	- will require maintaining an internal cache

Absence:
- A couple of days. My last exam is on the 2nd of July, then school's
  over.

---

A few details...

I need to thank Paul Brook for helping me, during both e-mail and IRC
conversations, figure how to fit the IOMMU API with qdev.

Then there's the AIO issue. The DMA layer currently pretranslates
addresses, maps them into the host, then schedules AIO (which happens on
a separate thread). It's technically possible, though unlikely, that the
guest OS may change the IOMMU mappings before AIO completes, which can
result in incorrect behavior wrt real hardware.

I think we should implement some sort of IOTLB/caching in the IOMMU, to
prevent this issue from biting people. Then the IOMMU will signal to
the guest OS that page tables have been invalidated only when AIO
finishes. Caching is a good idea anyway. Here's how it goes:

IOMMU/hw emulation	| Guest			| AIO
-----------------------------------------------------------------
			  start DMA transfer
start_transaction()
translate addresses
map addresses
start AIO ---------------------------------------->
						  do I/O
			  change mappings	  .
			  invalidate command      .
process command		  wait cmd completion     .
wait AIO completion				  .
			  			  do I/O
	  <----------------------------------------
unmap addresses
end_transaction()
signal cmd completion
			  use new mappings
			  start other transfers

On the other hand, we could just leave it alone for now. Changing
mappings during DMA is stupid anyway: I don't think the guest can
recover the results of DMA safely, even though it might be used on
transfers in progress you simply don't care about anymore. Paul Brook
suggested we could update the cpu_physical_memory_map() mappings
somehow, but I think that's kinda difficult to accomplish.

I've included a draft of the generic IOMMU API, in case you think
something needs to be changed. Ideally, this should work for any bus and
IOMMU, and also handle bus bridges. No provisions have been made for
interrupt translation _yet_.

Will post patches soon. Feel free to suggest devices to convert, if you
consider any of them a priority. We currently have the PIIX IDE and
RTL8139 working, and Linux boots and works well with the IOMMU enabled.


	Cheers,
	Eduard

diff --git a/hw/iommu.c b/hw/iommu.c
new file mode 100644
index 0000000..410c88c
--- /dev/null
+++ b/hw/iommu.c
@@ -0,0 +1,83 @@
+/*
+ * IOMMU layer
+ *
+ * Copyright (c) 2010 Eduard - Gabriel Munteanu
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <errno.h>
+
+#include "iommu.h"
+
+struct iommu *iommu_get(DeviceState *dev, DeviceState **real_dev)
+{
+    BusState *bus;
+
+    while (dev) {
+        bus = dev->parent_bus;
+        if (!bus)
+            goto out;
+
+        if (bus->iommu) {
+            *real_dev = dev;
+            return bus->iommu;
+        }
+
+        dev = bus->parent;
+    }
+
+out:
+    *real_dev = NULL;
+    return NULL;
+}
+
+int __iommu_rw(struct iommu *iommu,
+               DeviceState *dev,
+               target_phys_addr_t addr,
+               uint8_t *buf,
+               int len,
+               int is_write)
+{
+    int plen, err;
+    target_phys_addr_t paddr;
+    unsigned perms;
+
+    if (!is_write)
+        perms = IOMMU_PERM_READ;
+    else
+        perms = IOMMU_PERM_WRITE;
+
+    do {
+        err = iommu->translate(iommu, dev, addr, &paddr, &plen, perms);
+        if (err)
+            return err;
+        if (plen > len)
+            plen = len;
+
+        cpu_physical_memory_rw(paddr, buf, plen, is_write);
+
+        len -= plen;
+        addr += plen;
+        buf += plen;
+    } while (len);
+
+    return 0;
+}
+
diff --git a/hw/iommu.h b/hw/iommu.h
index cb51a6e..01978ab 100644
--- a/hw/iommu.h
+++ b/hw/iommu.h
@@ -1,8 +1,204 @@
 #ifndef QEMU_IOMMU_H
 #define QEMU_IOMMU_H
 
-#include <targphys.h>
+#include "pci.h"
+#include "targphys.h"
+#include "qdev.h"
 
-extern target_phys_addr_t iommu_translate(int bdf, target_phys_addr_t addr);
+/* Don't use directly. */
+struct iommu {
+    void *opaque;
+
+    void (*register_device)(struct iommu *iommu,
+                            DeviceState *dev);
+    int (*translate)(struct iommu *iommu,
+                     DeviceState *dev,
+                     target_phys_addr_t addr,
+                     target_phys_addr_t *paddr,
+                     int *len,
+                     unsigned perms);
+    int (*start_transaction)(struct iommu *iommu,
+                             DeviceState *dev);
+    void (*end_transaction)(struct iommu *iommu,
+                            DeviceState *dev);
+};
+
+#define IOMMU_PERM_READ   (1 << 0)
+#define IOMMU_PERM_WRITE  (1 << 1)
+
+#define IOMMU_PERM_RW     (IOMMU_PERM_READ | IOMMU_PERM_WRITE)
+
+static inline int iommu_nop_translate(struct iommu *iommu,
+                                      DeviceState *dev,
+                                      target_phys_addr_t addr,
+                                      target_phys_addr_t *paddr,
+                                      int *len,
+                                      unsigned perms)
+{
+    *paddr = addr;
+    *len = INT_MAX;
+
+    return 0;
+}
+
+static inline int iommu_nop_rw(struct iommu *iommu,
+                               DeviceState *dev,
+                               target_phys_addr_t addr,
+                               uint8_t *buf,
+                               int len,
+                               int is_write)
+{
+    cpu_physical_memory_rw(addr, buf, len, is_write);
+
+    return 0;
+}
+
+static inline int iommu_register_device(struct iommu *iommu,
+                                        DeviceState *dev)
+{
+    if (iommu && iommu->register_device)
+        iommu->register_device(iommu, dev);
+
+    return 0;
+}
+
+#ifdef CONFIG_IOMMU
+
+extern struct iommu *iommu_get(DeviceState *dev, DeviceState **real_dev);
+
+/**
+ * Translates an address for the given device and performs access checking.
+ *
+ * Defined in implementation-specific IOMMU code.
+ *
+ * @iommu   IOMMU
+ * @dev     qdev device
+ * @addr    address to be translated
+ * @paddr   translated address
+ * @len     number of bytes for which the translation is valid
+ * @rw      read or write?
+ *
+ * Returns 0 iff translation and access checking succeeded.
+ */
+static inline int iommu_translate(struct iommu *iommu,
+                                  DeviceState *dev,
+                                  target_phys_addr_t addr,
+                                  target_phys_addr_t *paddr,
+                                  int *len,
+                                  unsigned perms)
+{
+    if (iommu && iommu->translate)
+        return iommu->translate(iommu, dev, addr, paddr, len, perms);
+
+    return iommu_nop_translate(iommu, dev, addr, paddr, len, perms);
+}
+
+extern int __iommu_rw(struct iommu *iommu,
+                      DeviceState *dev,
+                      target_phys_addr_t addr,
+                      uint8_t *buf,
+                      int len,
+                      int is_write);
+
+/**
+ * Performs I/O with address translation and access checking.
+ *
+ * Defined in generic IOMMU code.
+ *
+ * @iommu   IOMMU
+ * @dev     qdev device
+ * @addr    address where to perform I/O
+ * @buf     buffer to read from or write to
+ * @len     length of the operation
+ * @rw      read or write?
+ *
+ * Returns 0 iff the I/O operation succeeded.
+ */
+static inline int iommu_rw(struct iommu *iommu,
+                           DeviceState *dev,
+                           target_phys_addr_t addr,
+                           uint8_t *buf,
+                           int len,
+                           int is_write)
+{
+    if (iommu && iommu->translate)
+        return __iommu_rw(iommu, dev, addr, buf, len, is_write);
+
+    return iommu_nop_rw(iommu, dev, addr, buf, len, is_write);
+}
+
+static inline int iommu_start_transaction(struct iommu *iommu,
+                                          DeviceState *dev)
+{
+    if (iommu && iommu->start_transaction)
+        return iommu->start_transaction(iommu, dev);
+
+    return 0;
+}
+
+static inline void iommu_end_transaction(struct iommu *iommu,
+                                         DeviceState *dev)
+{
+    if (iommu && iommu->end_transaction)
+        iommu->end_transaction(iommu, dev);
+}
+
+#else /* CONFIG_IOMMU */
+
+static inline struct iommu *iommu_get(DeviceState *dev, DeviceState **real_dev)
+{
+    return NULL;
+}
+
+static inline int iommu_translate(struct iommu *iommu,
+                                  DeviceState *dev,
+                                  target_phys_addr_t addr,
+                                  target_phys_addr_t *paddr,
+                                  int *len,
+                                  unsigned perms)
+{
+    return iommu_nop_translate(iommu, dev, addr, paddr, len, perms);
+}
+
+static inline int iommu_rw(struct iommu *iommu,
+                           DeviceState *dev,
+                           target_phys_addr_t addr,
+                           uint8_t *buf,
+                           int len,
+                           int is_write)
+{
+    return iommu_nop_rw(iommu, dev, addr, buf, len, is_write);
+}
+
+static inline int iommu_start_transaction(struct iommu *iommu,
+                                          DeviceState *dev)
+{
+    return 0;
+}
+
+static inline void iommu_end_transaction(struct iommu *iommu,
+                                         DeviceState *dev)
+{
+}
+
+#endif /* CONFIG_IOMMU */
+
+static inline int iommu_read(struct iommu *iommu,
+                             DeviceState *dev,
+                             target_phys_addr_t addr,
+                             uint8_t *buf,
+                             int len)
+{
+    return iommu_rw(iommu, dev, addr, buf, len, 0);
+}
+
+static inline int iommu_write(struct iommu *iommu,
+                              DeviceState *dev,
+                              target_phys_addr_t addr,
+                              const uint8_t *buf,
+                              int len)
+{
+    return iommu_rw(iommu, dev, addr, (uint8_t *) buf, len, 1);
+}
 
 #endif
diff --git a/hw/qdev.h b/hw/qdev.h
index a44060e..6e1e633 100644
--- a/hw/qdev.h
+++ b/hw/qdev.h
@@ -56,6 +56,8 @@ struct BusInfo {
     Property *props;
 };
 
+struct iommu;
+
 struct BusState {
     DeviceState *parent;
     BusInfo *info;
@@ -64,6 +66,10 @@ struct BusState {
     int qdev_allocated;
     QLIST_HEAD(, DeviceState) children;
     QLIST_ENTRY(BusState) sibling;
+
+#ifdef CONFIG_IOMMU
+    struct iommu *iommu;
+#endif
 };
 
 struct Property {

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux