[PATCH 16/16] iommupt: Add the Intel VT-D second stage page table format

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The VT-D second stage format is almost the same as the x86 PAE format,
except the bit encodings in the PTE are different and a few new PTE
features, like force coherency are present.

Among all the formats it is unique in not having a designated present bit.

Cc: Tina Zhang <tina.zhang@xxxxxxxxx>
Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
Cc: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
---
 drivers/iommu/generic_pt/Kconfig           |   6 +
 drivers/iommu/generic_pt/fmt/Makefile      |   2 +
 drivers/iommu/generic_pt/fmt/defs_vtdss.h  |  21 ++
 drivers/iommu/generic_pt/fmt/iommu_vtdss.c |   8 +
 drivers/iommu/generic_pt/fmt/vtdss.h       | 276 +++++++++++++++++++++
 include/linux/generic_pt/common.h          |   4 +
 include/linux/generic_pt/iommu.h           |  12 +
 7 files changed, 329 insertions(+)
 create mode 100644 drivers/iommu/generic_pt/fmt/defs_vtdss.h
 create mode 100644 drivers/iommu/generic_pt/fmt/iommu_vtdss.c
 create mode 100644 drivers/iommu/generic_pt/fmt/vtdss.h

diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig
index 2d08b58e953e4d..c17e09e2d03025 100644
--- a/drivers/iommu/generic_pt/Kconfig
+++ b/drivers/iommu/generic_pt/Kconfig
@@ -90,6 +90,11 @@ config IOMMU_PT_DART
 
 	  If unsure, say N here.
 
+config IOMMU_PT_VTDSS
+       tristate "IOMMU page table for Intel VT-D IOMMU Second Stage"
+	depends on !GENERIC_ATOMIC64 # for cmpxchg64
+	default n
+
 config IOMMU_PT_X86PAE
        tristate "IOMMU page table for x86 PAE"
 	depends on !GENERIC_ATOMIC64 # for cmpxchg64
@@ -105,6 +110,7 @@ config IOMMUT_PT_KUNIT_TEST
 	depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K
 	depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K
 	depends on IOMMU_PT_DART || !IOMMU_PT_DART
+	depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
 	depends on IOMMU_PT_X86PAE || !IOMMU_PT_X86PAE
 	default KUNIT_ALL_TESTS
 endif
diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile
index 1e10be24758fef..5a77c64d432534 100644
--- a/drivers/iommu/generic_pt/fmt/Makefile
+++ b/drivers/iommu/generic_pt/fmt/Makefile
@@ -10,6 +10,8 @@ iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K) += armv8_64k
 
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_DART) += dart
 
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
+
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86PAE) += x86pae
 
 IOMMU_PT_KUNIT_TEST :=
diff --git a/drivers/iommu/generic_pt/fmt/defs_vtdss.h b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
new file mode 100644
index 00000000000000..4a239bcaae2a90
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_VTDSS_H
+#define __GENERIC_PT_FMT_DEFS_VTDSS_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+typedef u64 pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct vtdss_pt_write_attrs {
+	u64 descriptor_bits;
+	gfp_t gfp;
+};
+#define pt_write_attrs vtdss_pt_write_attrs
+
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/iommu_vtdss.c b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
new file mode 100644
index 00000000000000..12e7829815047b
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT vtdss
+#define PT_SUPPORTED_FEATURES 0
+
+#include "iommu_template.h"
diff --git a/drivers/iommu/generic_pt/fmt/vtdss.h b/drivers/iommu/generic_pt/fmt/vtdss.h
new file mode 100644
index 00000000000000..233731365ac62d
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/vtdss.h
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ * Intel VT-D Second Stange 5/4 level page table
+ *
+ * This is described in
+ *   Section "3.7 Second-Stage Translation"
+ *   Section "9.8 Second-Stage Paging Entries"
+ *
+ * Of the "Intel Virtualization Technology for Directed I/O Architecture
+ * Specification".
+ *
+ * The named levels in the spec map to the pts->level as:
+ *   Table/SS-PTE - 0
+ *   Directory/SS-PDE - 1
+ *   Directory Ptr/SS-PDPTE - 2
+ *   PML4/SS-PML4E - 3
+ *   PML5/SS-PML5E - 4
+ * FIXME:
+ *  force_snooping
+ *  1g optional
+ *  forbid read-only
+ *  Use of direct clflush instead of DMA API
+ */
+#ifndef __GENERIC_PT_FMT_VTDSS_H
+#define __GENERIC_PT_FMT_VTDSS_H
+
+#include "defs_vtdss.h"
+#include "../pt_defs.h"
+
+#include <linux/bitfield.h>
+#include <linux/container_of.h>
+#include <linux/log2.h>
+
+enum {
+	PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
+	PT_MAX_VA_ADDRESS_LG2 = 57,
+	PT_ENTRY_WORD_SIZE = sizeof(u64),
+	PT_MAX_TOP_LEVEL = 4,
+	PT_GRANUAL_LG2SZ = 12,
+	PT_TABLEMEM_LG2SZ = 12,
+};
+
+/* Shared descriptor bits */
+enum {
+	VTDSS_FMT_R = BIT(0),
+	VTDSS_FMT_W = BIT(1),
+	VTDSS_FMT_X = BIT(2),
+	VTDSS_FMT_A = BIT(8),
+	VTDSS_FMT_D = BIT(9),
+	VTDSS_FMT_SNP = BIT(11),
+	VTDSS_FMT_OA = GENMASK_ULL(51, 12),
+};
+
+/* PDPTE/PDE */
+enum {
+	VTDSS_FMT_PS = BIT(7),
+};
+
+#define common_to_vtdss_pt(common_ptr) \
+	container_of_const(common_ptr, struct pt_vtdss, common)
+#define to_vtdss_pt(pts) common_to_vtdss_pt((pts)->range->common)
+
+static inline pt_oaddr_t vtdss_pt_table_pa(const struct pt_state *pts)
+{
+	return log2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry), PT_TABLEMEM_LG2SZ);
+}
+#define pt_table_pa vtdss_pt_table_pa
+
+static inline pt_oaddr_t vtdss_pt_entry_oa(const struct pt_state *pts)
+{
+	return log2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry), PT_GRANUAL_LG2SZ);
+}
+#define pt_entry_oa vtdss_pt_entry_oa
+
+static inline bool vtdss_pt_can_have_leaf(const struct pt_state *pts)
+{
+	return pts->level <= 2;
+}
+#define pt_can_have_leaf vtdss_pt_can_have_leaf
+
+static inline unsigned int vtdss_pt_table_item_lg2sz(const struct pt_state *pts)
+{
+	return PT_GRANUAL_LG2SZ +
+	       (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * pts->level;
+}
+#define pt_table_item_lg2sz vtdss_pt_table_item_lg2sz
+
+static inline unsigned int vtdss_pt_num_items_lg2(const struct pt_state *pts)
+{
+	return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 vtdss_pt_num_items_lg2
+
+static inline enum pt_entry_type vtdss_pt_load_entry_raw(struct pt_state *pts)
+{
+	const u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	pts->entry = entry = READ_ONCE(tablep[pts->index]);
+	if (!entry)
+		return PT_ENTRY_EMPTY;
+	if (pts->level == 0 ||
+	    (vtdss_pt_can_have_leaf(pts) && (pts->entry & VTDSS_FMT_PS)))
+		return PT_ENTRY_OA;
+	return PT_ENTRY_TABLE;
+}
+#define pt_load_entry_raw vtdss_pt_load_entry_raw
+
+static inline void
+vtdss_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+			    unsigned int oasz_lg2,
+			    const struct pt_write_attrs *attrs)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	entry = FIELD_PREP(VTDSS_FMT_OA, log2_div(oa, PT_GRANUAL_LG2SZ)) |
+		attrs->descriptor_bits;
+	if (pts->level != 0)
+		entry |= VTDSS_FMT_PS;
+
+	WRITE_ONCE(tablep[pts->index], entry);
+	pts->entry = entry;
+}
+#define pt_install_leaf_entry vtdss_pt_install_leaf_entry
+
+static inline bool vtdss_pt_install_table(struct pt_state *pts,
+					  pt_oaddr_t table_pa,
+					  const struct pt_write_attrs *attrs)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	/*
+	 * FIXME according to the SDM D is ignored by HW on table pointers?
+	 * io_pgtable_v2 sets it
+	 */
+	entry = VTDSS_FMT_R | VTDSS_FMT_W |
+		FIELD_PREP(VTDSS_FMT_OA, log2_div(table_pa, PT_GRANUAL_LG2SZ));
+	return pt_table_install64(&tablep[pts->index], entry, pts->entry);
+}
+#define pt_install_table vtdss_pt_install_table
+
+static inline void vtdss_pt_attr_from_entry(const struct pt_state *pts,
+					    struct pt_write_attrs *attrs)
+{
+	attrs->descriptor_bits = pts->entry & (VTDSS_FMT_R | VTDSS_FMT_W |
+					       VTDSS_FMT_X | VTDSS_FMT_SNP);
+}
+#define pt_attr_from_entry vtdss_pt_attr_from_entry
+
+static inline void vtdss_pt_clear_entry(struct pt_state *pts,
+					unsigned int num_contig_lg2)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+
+	WRITE_ONCE(tablep[pts->index], 0);
+}
+#define pt_clear_entry vtdss_pt_clear_entry
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_vtdss
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+	return &container_of(iommu_table, struct pt_iommu_table, iommu)
+			->vtdss_pt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+	return &container_of(common, struct pt_iommu_table, vtdss_pt.common)
+			->iommu;
+}
+
+static inline int vtdss_pt_iommu_set_prot(struct pt_common *common,
+					  struct pt_write_attrs *attrs,
+					  unsigned int iommu_prot)
+{
+	u64 pte = 0;
+
+	/*
+	 * VTDSS does not have a present bit, so we tell if any entry is present
+	 * by checking for R or W.
+	 */
+	if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE)))
+		return -EINVAL;
+
+	/*
+	 * FIXME: The VTD driver has a bug setting DMA_FL_PTE_PRESENT on the SS
+	 * table, which forces R on always.
+	 */
+	pte |= VTDSS_FMT_R;
+
+	if (iommu_prot & IOMMU_READ)
+		pte |= VTDSS_FMT_R;
+	if (iommu_prot & IOMMU_WRITE)
+		pte |= VTDSS_FMT_W;
+/* FIXME	if (dmar_domain->set_pte_snp)
+		pte |= VTDSS_FMT_SNP; */
+
+	attrs->descriptor_bits = pte;
+	return 0;
+}
+#define pt_iommu_set_prot vtdss_pt_iommu_set_prot
+
+static inline int vtdss_pt_iommu_fmt_init(struct pt_iommu_vtdss *iommu_table,
+					  struct pt_iommu_vtdss_cfg *cfg)
+{
+	struct pt_vtdss *table = &iommu_table->vtdss_pt;
+
+	/* FIXME configurable */
+	pt_top_set_level(&table->common, 3);
+	return 0;
+}
+#define pt_iommu_fmt_init vtdss_pt_iommu_fmt_init
+
+#if defined(GENERIC_PT_KUNIT)
+static void vtdss_pt_kunit_setup_cfg(struct pt_iommu_vtdss_cfg *cfg)
+{
+}
+#define pt_kunit_setup_cfg vtdss_pt_kunit_setup_cfg
+#endif
+
+/*
+ * Requires Tina's series:
+ *  https://patch.msgid.link/r/20231106071226.9656-3-tina.zhang@xxxxxxxxx
+ * See my github for an integrated version
+ */
+#if defined(GENERIC_PT_KUNIT) && IS_ENABLED(CONFIG_CONFIG_IOMMU_IO_PGTABLE_VTD)
+#include <linux/io-pgtable.h>
+
+static struct io_pgtable_ops *
+vtdss_pt_iommu_alloc_io_pgtable(struct pt_iommu_vtdss_cfg *cfg,
+				struct device *iommu_dev,
+				struct io_pgtable_cfg **unused_pgtbl_cfg)
+{
+	struct io_pgtable_cfg pgtbl_cfg = {};
+
+	pgtbl_cfg.ias = 48;
+	pgtbl_cfg.oas = 52;
+	pgtbl_cfg.vtd_cfg.cap_reg = 4 << 8;
+	pgtbl_cfg.vtd_cfg.ecap_reg = BIT(26) | BIT(60) | BIT_ULL(48) | BIT_ULL(56);
+	pgtbl_cfg.pgsize_bitmap = SZ_4K;
+	pgtbl_cfg.coherent_walk = true;
+	return alloc_io_pgtable_ops(INTEL_IOMMU, &pgtbl_cfg, NULL);
+}
+#define pt_iommu_alloc_io_pgtable vtdss_pt_iommu_alloc_io_pgtable
+
+static void vtdss_pt_iommu_setup_ref_table(struct pt_iommu_vtdss *iommu_table,
+					   struct io_pgtable_ops *pgtbl_ops)
+{
+	struct io_pgtable_cfg *pgtbl_cfg =
+		&io_pgtable_ops_to_pgtable(pgtbl_ops)->cfg;
+	struct pt_common *common = &iommu_table->vtdss_pt.common;
+
+	pt_top_set(common, __va(pgtbl_cfg->vtd_cfg.pgd), 3);
+}
+#define pt_iommu_setup_ref_table vtdss_pt_iommu_setup_ref_table
+
+static u64 vtdss_pt_kunit_cmp_mask_entry(struct pt_state *pts)
+{
+	if (pts->type == PT_ENTRY_TABLE)
+		return pts->entry & (~(u64)(VTDSS_FMT_OA));
+	return pts->entry;
+}
+#define pt_kunit_cmp_mask_entry vtdss_pt_kunit_cmp_mask_entry
+#endif
+
+#endif
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index 558302fe1e0324..a3469132db7dda 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -145,6 +145,10 @@ enum {
 	PT_FEAT_DART_V2 = PT_FEAT_FMT_START,
 };
 
+struct pt_vtdss {
+	struct pt_common common;
+};
+
 struct pt_x86pae {
 	struct pt_common common;
 };
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index 351a69fe62dd1d..b9ecab07b0223d 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -317,6 +317,18 @@ struct pt_iommu_dart_cfg {
 int pt_iommu_dart_init(struct pt_iommu_dart *table,
 		       struct pt_iommu_dart_cfg *cfg, gfp_t gfp);
 
+struct pt_iommu_vtdss {
+	struct pt_iommu iommu;
+	struct pt_vtdss vtdss_pt;
+};
+
+struct pt_iommu_vtdss_cfg {
+	struct device *iommu_device;
+	unsigned int features;
+};
+int pt_iommu_vtdss_init(struct pt_iommu_vtdss *table,
+			struct pt_iommu_vtdss_cfg *cfg, gfp_t gfp);
+
 struct pt_iommu_x86pae {
 	struct pt_iommu iommu;
 	struct pt_x86pae x86pae_pt;
-- 
2.46.0





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux