Connect the IOAS to its IOCTL interface. This exposes most of the
functionality in the io_pagetable to userspace.
This is intended to be the core of the generic interface that IOMMUFD will
provide. Every IOMMU driver should be able to implement an iommu_domain
that is compatible with this generic mechanism.
It is also designed to be easy to use for simple non virtual machine
monitor users, like DPDK:
- Universal simple support for all IOMMUs (no PPC special path)
- An IOVA allocator that considerds the aperture and the reserved ranges
- io_pagetable allows any number of iommu_domains to be connected to the
IOAS
Along with room in the design to add non-generic features to cater to
specific HW functionality.
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
---
drivers/iommu/iommufd/Makefile | 1 +
drivers/iommu/iommufd/ioas.c | 248 ++++++++++++++++++++++++
drivers/iommu/iommufd/iommufd_private.h | 27 +++
drivers/iommu/iommufd/main.c | 17 ++
include/uapi/linux/iommufd.h | 132 +++++++++++++
5 files changed, 425 insertions(+)
create mode 100644 drivers/iommu/iommufd/ioas.c
diff --git a/drivers/iommu/iommufd/Makefile b/drivers/iommu/iommufd/Makefile
index b66a8c47ff55ec..2b4f36f1b72f9d 100644
--- a/drivers/iommu/iommufd/Makefile
+++ b/drivers/iommu/iommufd/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
iommufd-y := \
io_pagetable.o \
+ ioas.o \
main.o \
pages.o
diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c
new file mode 100644
index 00000000000000..c530b2ba74b06b
--- /dev/null
+++ b/drivers/iommu/iommufd/ioas.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ */
+#include <linux/interval_tree.h>
+#include <linux/iommufd.h>
+#include <linux/iommu.h>
+#include <uapi/linux/iommufd.h>
+
+#include "io_pagetable.h"
+
+void iommufd_ioas_destroy(struct iommufd_object *obj)
+{
+ struct iommufd_ioas *ioas = container_of(obj, struct iommufd_ioas, obj);
+ int rc;
+
+ rc = iopt_unmap_all(&ioas->iopt);
+ WARN_ON(rc);
+ iopt_destroy_table(&ioas->iopt);
+}
+
+struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx)
+{
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ ioas = iommufd_object_alloc(ictx, ioas, IOMMUFD_OBJ_IOAS);
+ if (IS_ERR(ioas))
+ return ioas;
+
+ rc = iopt_init_table(&ioas->iopt);
+ if (rc)
+ goto out_abort;
+ return ioas;
+
+out_abort:
+ iommufd_object_abort(ictx, &ioas->obj);
+ return ERR_PTR(rc);
+}
+
+int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_alloc *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ if (cmd->flags)
+ return -EOPNOTSUPP;
+
+ ioas = iommufd_ioas_alloc(ucmd->ictx);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ cmd->out_ioas_id = ioas->obj.id;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_table;
+ iommufd_object_finalize(ucmd->ictx, &ioas->obj);
+ return 0;
+
+out_table:
+ iommufd_ioas_destroy(&ioas->obj);
+ return rc;
+}
+
+int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_iova_ranges __user *uptr = ucmd->ubuffer;
+ struct iommu_ioas_iova_ranges *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ struct interval_tree_span_iter span;
+ u32 max_iovas;
+ int rc;
+
+ if (cmd->__reserved)
+ return -EOPNOTSUPP;
+
+ max_iovas = cmd->size - sizeof(*cmd);
+ if (max_iovas % sizeof(cmd->out_valid_iovas[0]))
+ return -EINVAL;
+ max_iovas /= sizeof(cmd->out_valid_iovas[0]);
+
+ ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ down_read(&ioas->iopt.iova_rwsem);
+ cmd->out_num_iovas = 0;
+ for (interval_tree_span_iter_first(
+ &span, &ioas->iopt.reserved_iova_itree, 0, ULONG_MAX);
+ !interval_tree_span_iter_done(&span);
+ interval_tree_span_iter_next(&span)) {
+ if (!span.is_hole)
+ continue;
+ if (cmd->out_num_iovas < max_iovas) {
+ rc = put_user((u64)span.start_hole,
+ &uptr->out_valid_iovas[cmd->out_num_iovas]
+ .start);
+ if (rc)
+ goto out_put;
+ rc = put_user(
+ (u64)span.last_hole,
+ &uptr->out_valid_iovas[cmd->out_num_iovas].last);
+ if (rc)
+ goto out_put;
+ }
+ cmd->out_num_iovas++;
+ }
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+ if (rc)
+ goto out_put;
+ if (cmd->out_num_iovas > max_iovas)
+ rc = -EMSGSIZE;
+out_put:
+ up_read(&ioas->iopt.iova_rwsem);
+ iommufd_put_object(&ioas->obj);
+ return rc;
+}
+
+static int conv_iommu_prot(u32 map_flags)
+{
+ int iommu_prot;
+
+ /*
+ * We provide no manual cache coherency ioctls to userspace and most
+ * architectures make the CPU ops for cache flushing privileged.
+ * Therefore we require the underlying IOMMU to support CPU coherent
+ * operation.
+ */
+ iommu_prot = IOMMU_CACHE;
+ if (map_flags & IOMMU_IOAS_MAP_WRITEABLE)
+ iommu_prot |= IOMMU_WRITE;
+ if (map_flags & IOMMU_IOAS_MAP_READABLE)
+ iommu_prot |= IOMMU_READ;
+ return iommu_prot;
+}
+
+int iommufd_ioas_map(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_map *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ unsigned int flags = 0;
+ unsigned long iova;
+ int rc;
+
+ if ((cmd->flags &
+ ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE)) ||
+ cmd->__reserved)
+ return -EOPNOTSUPP;
+ if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX)
+ return -EOVERFLOW;
+
+ ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
+ flags = IOPT_ALLOC_IOVA;
+ iova = cmd->iova;
+ rc = iopt_map_user_pages(&ioas->iopt, &iova,
+ u64_to_user_ptr(cmd->user_va), cmd->length,
+ conv_iommu_prot(cmd->flags), flags);
+ if (rc)
+ goto out_put;
+
+ cmd->iova = iova;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put:
+ iommufd_put_object(&ioas->obj);
+ return rc;
+}
+
+int iommufd_ioas_copy(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_copy *cmd = ucmd->cmd;
+ struct iommufd_ioas *src_ioas;
+ struct iommufd_ioas *dst_ioas;
+ struct iopt_pages *pages;
+ unsigned int flags = 0;
+ unsigned long iova;
+ unsigned long start_byte;
+ int rc;
+
+ if ((cmd->flags &
+ ~(IOMMU_IOAS_MAP_FIXED_IOVA | IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE)))
+ return -EOPNOTSUPP;
+ if (cmd->length >= ULONG_MAX)
+ return -EOVERFLOW;
+
+ src_ioas = iommufd_get_ioas(ucmd, cmd->src_ioas_id);
+ if (IS_ERR(src_ioas))
+ return PTR_ERR(src_ioas);
+ /* FIXME: copy is not limited to an exact match anymore */
+ pages = iopt_get_pages(&src_ioas->iopt, cmd->src_iova, &start_byte,
+ cmd->length);
+ iommufd_put_object(&src_ioas->obj);
+ if (IS_ERR(pages))
+ return PTR_ERR(pages);
+
+ dst_ioas = iommufd_get_ioas(ucmd, cmd->dst_ioas_id);
+ if (IS_ERR(dst_ioas)) {
+ iopt_put_pages(pages);
+ return PTR_ERR(dst_ioas);
+ }
+
+ if (!(cmd->flags & IOMMU_IOAS_MAP_FIXED_IOVA))
+ flags = IOPT_ALLOC_IOVA;
+ iova = cmd->dst_iova;
+ rc = iopt_map_pages(&dst_ioas->iopt, pages, &iova, start_byte,
+ cmd->length, conv_iommu_prot(cmd->flags), flags);
+ if (rc) {
+ iopt_put_pages(pages);
+ goto out_put_dst;
+ }
+
+ cmd->dst_iova = iova;
+ rc = iommufd_ucmd_respond(ucmd, sizeof(*cmd));
+out_put_dst:
+ iommufd_put_object(&dst_ioas->obj);
+ return rc;
+}
+
+int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd)
+{
+ struct iommu_ioas_unmap *cmd = ucmd->cmd;
+ struct iommufd_ioas *ioas;
+ int rc;
+
+ ioas = iommufd_get_ioas(ucmd, cmd->ioas_id);
+ if (IS_ERR(ioas))
+ return PTR_ERR(ioas);
+
+ if (cmd->iova == 0 && cmd->length == U64_MAX) {
+ rc = iopt_unmap_all(&ioas->iopt);
+ } else {
+ if (cmd->iova >= ULONG_MAX || cmd->length >= ULONG_MAX) {
+ rc = -EOVERFLOW;
+ goto out_put;
+ }
+ rc = iopt_unmap_iova(&ioas->iopt, cmd->iova, cmd->length);
+ }
+
+out_put:
+ iommufd_put_object(&ioas->obj);
+ return rc;
+}
diff --git a/drivers/iommu/iommufd/iommufd_private.h b/drivers/iommu/iommufd/iommufd_private.h
index bcf08e61bc87e9..d24c9dac5a82a9 100644
--- a/drivers/iommu/iommufd/iommufd_private.h
+++ b/drivers/iommu/iommufd/iommufd_private.h
@@ -96,6 +96,7 @@ static inline int iommufd_ucmd_respond(struct iommufd_ucmd *ucmd,
enum iommufd_object_type {
IOMMUFD_OBJ_NONE,
IOMMUFD_OBJ_ANY = IOMMUFD_OBJ_NONE,
+ IOMMUFD_OBJ_IOAS,
IOMMUFD_OBJ_MAX,
};
@@ -147,4 +148,30 @@ struct iommufd_object *_iommufd_object_alloc(struct iommufd_ctx *ictx,
type), \
typeof(*(ptr)), obj)
+/*
+ * The IO Address Space (IOAS) pagetable is a virtual page table backed by the
+ * io_pagetable object. It is a user controlled mapping of IOVA -> PFNs. The
+ * mapping is copied into all of the associated domains and made available to
+ * in-kernel users.
+ */
+struct iommufd_ioas {
+ struct iommufd_object obj;
+ struct io_pagetable iopt;
+};
+
+static inline struct iommufd_ioas *iommufd_get_ioas(struct iommufd_ucmd *ucmd,
+ u32 id)
+{
+ return container_of(iommufd_get_object(ucmd->ictx, id,
+ IOMMUFD_OBJ_IOAS),
+ struct iommufd_ioas, obj);
+}
+
+struct iommufd_ioas *iommufd_ioas_alloc(struct iommufd_ctx *ictx);
+int iommufd_ioas_alloc_ioctl(struct iommufd_ucmd *ucmd);
+void iommufd_ioas_destroy(struct iommufd_object *obj);
+int iommufd_ioas_iova_ranges(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_map(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_copy(struct iommufd_ucmd *ucmd);
+int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd);
#endif
diff --git a/drivers/iommu/iommufd/main.c b/drivers/iommu/iommufd/main.c
index ae8db2f663004f..e506f493b54cfe 100644
--- a/drivers/iommu/iommufd/main.c
+++ b/drivers/iommu/iommufd/main.c
@@ -184,6 +184,10 @@ static int iommufd_fops_release(struct inode *inode, struct file *filp)
}
union ucmd_buffer {
+ struct iommu_ioas_alloc alloc;
+ struct iommu_ioas_iova_ranges iova_ranges;
+ struct iommu_ioas_map map;
+ struct iommu_ioas_unmap unmap;
struct iommu_destroy destroy;
};
@@ -205,6 +209,16 @@ struct iommufd_ioctl_op {
}
static struct iommufd_ioctl_op iommufd_ioctl_ops[] = {
IOCTL_OP(IOMMU_DESTROY, iommufd_destroy, struct iommu_destroy, id),
+ IOCTL_OP(IOMMU_IOAS_ALLOC, iommufd_ioas_alloc_ioctl,
+ struct iommu_ioas_alloc, out_ioas_id),
+ IOCTL_OP(IOMMU_IOAS_COPY, iommufd_ioas_copy, struct iommu_ioas_copy,
+ src_iova),
+ IOCTL_OP(IOMMU_IOAS_IOVA_RANGES, iommufd_ioas_iova_ranges,
+ struct iommu_ioas_iova_ranges, __reserved),
+ IOCTL_OP(IOMMU_IOAS_MAP, iommufd_ioas_map, struct iommu_ioas_map,
+ __reserved),
+ IOCTL_OP(IOMMU_IOAS_UNMAP, iommufd_ioas_unmap, struct iommu_ioas_unmap,
+ length),
};
static long iommufd_fops_ioctl(struct file *filp, unsigned int cmd,
@@ -270,6 +284,9 @@ struct iommufd_ctx *iommufd_fget(int fd)
}
static struct iommufd_object_ops iommufd_object_ops[] = {
+ [IOMMUFD_OBJ_IOAS] = {
+ .destroy = iommufd_ioas_destroy,
+ },
};
static struct miscdevice iommu_misc_dev = {
diff --git a/include/uapi/linux/iommufd.h b/include/uapi/linux/iommufd.h
index 2f7f76ec6db4cb..ba7b17ec3002e3 100644
--- a/include/uapi/linux/iommufd.h
+++ b/include/uapi/linux/iommufd.h
@@ -37,6 +37,11 @@
enum {
IOMMUFD_CMD_BASE = 0x80,
IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE,
+ IOMMUFD_CMD_IOAS_ALLOC,
+ IOMMUFD_CMD_IOAS_IOVA_RANGES,
+ IOMMUFD_CMD_IOAS_MAP,
+ IOMMUFD_CMD_IOAS_COPY,
+ IOMMUFD_CMD_IOAS_UNMAP,
};
/**
@@ -52,4 +57,131 @@ struct iommu_destroy {
};
#define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY)
+/**
+ * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC)
+ * @size: sizeof(struct iommu_ioas_alloc)
+ * @flags: Must be 0
+ * @out_ioas_id: Output IOAS ID for the allocated object
+ *
+ * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA)
+ * to memory mapping.
+ */
+struct iommu_ioas_alloc {
+ __u32 size;
+ __u32 flags;
+ __u32 out_ioas_id;
+};
+#define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC)
+
+/**
+ * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES)
+ * @size: sizeof(struct iommu_ioas_iova_ranges)
+ * @ioas_id: IOAS ID to read ranges from
+ * @out_num_iovas: Output total number of ranges in the IOAS
+ * @__reserved: Must be 0
+ * @out_valid_iovas: Array of valid IOVA ranges. The array length is the smaller
+ * of out_num_iovas or the length implied by size.
+ * @out_valid_iovas.start: First IOVA in the allowed range
+ * @out_valid_iovas.last: Inclusive last IOVA in the allowed range
+ *
+ * Query an IOAS for ranges of allowed IOVAs. Operation outside these ranges is
+ * not allowed. out_num_iovas will be set to the total number of iovas
+ * and the out_valid_iovas[] will be filled in as space permits.
+ * size should include the allocated flex array.
+ */
+struct iommu_ioas_iova_ranges {
+ __u32 size;
+ __u32 ioas_id;
+ __u32 out_num_iovas;
+ __u32 __reserved;
+ struct iommu_valid_iovas {
+ __aligned_u64 start;
+ __aligned_u64 last;
+ } out_valid_iovas[];
+};
+#define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES)
+
+/**
+ * enum iommufd_ioas_map_flags - Flags for map and copy
+ * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate
+ * IOVA to place the mapping at
+ * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping
+ * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping
+ */
+enum iommufd_ioas_map_flags {
+ IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0,
+ IOMMU_IOAS_MAP_WRITEABLE = 1 << 1,
+ IOMMU_IOAS_MAP_READABLE = 1 << 2,
+};
+
+/**
+ * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP)
+ * @size: sizeof(struct iommu_ioas_map)
+ * @flags: Combination of enum iommufd_ioas_map_flags
+ * @ioas_id: IOAS ID to change the mapping of
+ * @__reserved: Must be 0
+ * @user_va: Userspace pointer to start mapping from
+ * @length: Number of bytes to map
+ * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set
+ * then this must be provided as input.
+ *
+ * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the
+ * mapping will be established at iova, otherwise a suitable location will be
+ * automatically selected and returned in iova.
+ */
+struct iommu_ioas_map {
+ __u32 size;
+ __u32 flags;
+ __u32 ioas_id;
+ __u32 __reserved;
+ __aligned_u64 user_va;
+ __aligned_u64 length;
+ __aligned_u64 iova;
+};
+#define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP)
+
+/**
+ * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY)
+ * @size: sizeof(struct iommu_ioas_copy)
+ * @flags: Combination of enum iommufd_ioas_map_flags
+ * @dst_ioas_id: IOAS ID to change the mapping of
+ * @src_ioas_id: IOAS ID to copy from