[PATCH 13/16] iommupt: Add the x86 PAE page table format

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This is used by x86 CPUs and can be used in both x86 IOMMUs. When the x86
IOMMU is running SVA it is using this page table format.

This implementation follows the AMD v2 io-pgtable version.

There is nothing remarkable here, the format has a variable top and
limited support for different page sizes and no contiguous pages support.

In principle this can support the 32 bit configuration with fewer table
levels.

FIXME: Compare the bits against the VT-D version too.

Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
---
 drivers/iommu/generic_pt/Kconfig            |   6 +
 drivers/iommu/generic_pt/fmt/Makefile       |   2 +
 drivers/iommu/generic_pt/fmt/defs_x86pae.h  |  21 ++
 drivers/iommu/generic_pt/fmt/iommu_x86pae.c |   8 +
 drivers/iommu/generic_pt/fmt/x86pae.h       | 283 ++++++++++++++++++++
 include/linux/generic_pt/common.h           |   4 +
 include/linux/generic_pt/iommu.h            |  12 +
 7 files changed, 336 insertions(+)
 create mode 100644 drivers/iommu/generic_pt/fmt/defs_x86pae.h
 create mode 100644 drivers/iommu/generic_pt/fmt/iommu_x86pae.c
 create mode 100644 drivers/iommu/generic_pt/fmt/x86pae.h

diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig
index e34be10cf8bac2..a7c006234fc218 100644
--- a/drivers/iommu/generic_pt/Kconfig
+++ b/drivers/iommu/generic_pt/Kconfig
@@ -70,6 +70,11 @@ config IOMMU_PT_ARMV8_64K
 
 	  If unsure, say N here.
 
+config IOMMU_PT_X86PAE
+       tristate "IOMMU page table for x86 PAE"
+	depends on !GENERIC_ATOMIC64 # for cmpxchg64
+	default n
+
 config IOMMUT_PT_KUNIT_TEST
 	tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS
 	select IOMMU_IO_PGTABLE
@@ -78,6 +83,7 @@ config IOMMUT_PT_KUNIT_TEST
 	depends on IOMMU_PT_ARMV8_4K || !IOMMU_PT_ARMV8_4K
 	depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K
 	depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K
+	depends on IOMMU_PT_X86PAE || !IOMMU_PT_X86PAE
 	default KUNIT_ALL_TESTS
 endif
 endif
diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile
index 16031fc1270178..fe3d7ae3685468 100644
--- a/drivers/iommu/generic_pt/fmt/Makefile
+++ b/drivers/iommu/generic_pt/fmt/Makefile
@@ -6,6 +6,8 @@ iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_4K) += armv8_4k
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_16K) += armv8_16k
 iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K) += armv8_64k
 
+iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86PAE) += x86pae
+
 IOMMU_PT_KUNIT_TEST :=
 define create_format
 obj-$(2) += iommu_$(1).o
diff --git a/drivers/iommu/generic_pt/fmt/defs_x86pae.h b/drivers/iommu/generic_pt/fmt/defs_x86pae.h
new file mode 100644
index 00000000000000..0d93454264b5da
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/defs_x86pae.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ */
+#ifndef __GENERIC_PT_FMT_DEFS_X86PAE_H
+#define __GENERIC_PT_FMT_DEFS_X86PAE_H
+
+#include <linux/generic_pt/common.h>
+#include <linux/types.h>
+
+typedef u64 pt_vaddr_t;
+typedef u64 pt_oaddr_t;
+
+struct x86pae_pt_write_attrs {
+	u64 descriptor_bits;
+	gfp_t gfp;
+};
+#define pt_write_attrs x86pae_pt_write_attrs
+
+#endif
diff --git a/drivers/iommu/generic_pt/fmt/iommu_x86pae.c b/drivers/iommu/generic_pt/fmt/iommu_x86pae.c
new file mode 100644
index 00000000000000..f7ec71c61729e3
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/iommu_x86pae.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ */
+#define PT_FMT x86pae
+#define PT_SUPPORTED_FEATURES 0
+
+#include "iommu_template.h"
diff --git a/drivers/iommu/generic_pt/fmt/x86pae.h b/drivers/iommu/generic_pt/fmt/x86pae.h
new file mode 100644
index 00000000000000..9e0ee74275fcb3
--- /dev/null
+++ b/drivers/iommu/generic_pt/fmt/x86pae.h
@@ -0,0 +1,283 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
+ *
+ * x86 PAE page table
+ *
+ * This is described in
+ *   Section "4.4 PAE Paging" of the Intel Software Developer's Manual Volume 3
+ *   Section "2.2.6 I/O Page Tables for Guest Translations" of the "AMD I/O
+ *   Virtualization Technology (IOMMU) Specification"
+ *
+ * It is used by x86 CPUs and The AMD and VT-D IOMMU HW.
+ *
+ * The named levels in the spec map to the pts->level as:
+ *   Table/PTE - 0
+ *   Directory/PDE - 1
+ *   Directory Ptr/PDPTE - 2
+ *   PML4/PML4E - 3
+ *   PML5/PML5E - 4
+ * FIXME: __sme_set
+ */
+#ifndef __GENERIC_PT_FMT_X86PAE_H
+#define __GENERIC_PT_FMT_X86PAE_H
+
+#include "defs_x86pae.h"
+#include "../pt_defs.h"
+
+#include <linux/bitfield.h>
+#include <linux/container_of.h>
+#include <linux/log2.h>
+
+enum {
+	PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
+	PT_MAX_VA_ADDRESS_LG2 = 57,
+	PT_ENTRY_WORD_SIZE = sizeof(u64),
+	PT_MAX_TOP_LEVEL = 4,
+	PT_GRANUAL_LG2SZ = 12,
+	PT_TABLEMEM_LG2SZ = 12,
+};
+
+/* Shared descriptor bits */
+enum {
+	X86PAE_FMT_P = BIT(0),
+	X86PAE_FMT_RW = BIT(1),
+	X86PAE_FMT_U = BIT(2),
+	X86PAE_FMT_A = BIT(5),
+	X86PAE_FMT_D = BIT(6),
+	X86PAE_FMT_OA = GENMASK_ULL(51, 12),
+	X86PAE_FMT_XD = BIT_ULL(63),
+};
+
+/* PDPTE/PDE */
+enum {
+	X86PAE_FMT_PS = BIT(7),
+};
+
+#define common_to_x86pae_pt(common_ptr) \
+	container_of_const(common_ptr, struct pt_x86pae, common)
+#define to_x86pae_pt(pts) common_to_x86pae_pt((pts)->range->common)
+
+static inline pt_oaddr_t x86pae_pt_table_pa(const struct pt_state *pts)
+{
+	return log2_mul(FIELD_GET(X86PAE_FMT_OA, pts->entry),
+			PT_TABLEMEM_LG2SZ);
+}
+#define pt_table_pa x86pae_pt_table_pa
+
+static inline pt_oaddr_t x86pae_pt_entry_oa(const struct pt_state *pts)
+{
+	return log2_mul(FIELD_GET(X86PAE_FMT_OA, pts->entry), PT_GRANUAL_LG2SZ);
+}
+#define pt_entry_oa x86pae_pt_entry_oa
+
+static inline bool x86pae_pt_can_have_leaf(const struct pt_state *pts)
+{
+	return pts->level <= 2;
+}
+#define pt_can_have_leaf x86pae_pt_can_have_leaf
+
+static inline unsigned int
+x86pae_pt_table_item_lg2sz(const struct pt_state *pts)
+{
+	return PT_GRANUAL_LG2SZ +
+	       (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * pts->level;
+}
+#define pt_table_item_lg2sz x86pae_pt_table_item_lg2sz
+
+static inline unsigned int x86pae_pt_num_items_lg2(const struct pt_state *pts)
+{
+	return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64));
+}
+#define pt_num_items_lg2 x86pae_pt_num_items_lg2
+
+static inline enum pt_entry_type x86pae_pt_load_entry_raw(struct pt_state *pts)
+{
+	const u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	pts->entry = entry = READ_ONCE(tablep[pts->index]);
+	if (!(entry & X86PAE_FMT_P))
+		return PT_ENTRY_EMPTY;
+	if (pts->level == 0 ||
+	    (x86pae_pt_can_have_leaf(pts) && (pts->entry & X86PAE_FMT_PS)))
+		return PT_ENTRY_OA;
+	return PT_ENTRY_TABLE;
+}
+#define pt_load_entry_raw x86pae_pt_load_entry_raw
+
+static inline void
+x86pae_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa,
+			     unsigned int oasz_lg2,
+			     const struct pt_write_attrs *attrs)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	entry = X86PAE_FMT_P |
+		FIELD_PREP(X86PAE_FMT_OA, log2_div(oa, PT_GRANUAL_LG2SZ)) |
+		attrs->descriptor_bits;
+	if (pts->level != 0)
+		entry |= X86PAE_FMT_PS;
+
+	WRITE_ONCE(tablep[pts->index], entry);
+	pts->entry = entry;
+}
+#define pt_install_leaf_entry x86pae_pt_install_leaf_entry
+
+static inline bool x86pae_pt_install_table(struct pt_state *pts,
+					   pt_oaddr_t table_pa,
+					   const struct pt_write_attrs *attrs)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+	u64 entry;
+
+	/*
+	 * FIXME according to the SDM D is ignored by HW on table pointers?
+	 * io_pgtable_v2 sets it
+	 */
+	entry = X86PAE_FMT_P | X86PAE_FMT_RW | X86PAE_FMT_U | X86PAE_FMT_A |
+		X86PAE_FMT_D |
+		FIELD_PREP(X86PAE_FMT_OA, log2_div(table_pa, PT_GRANUAL_LG2SZ));
+	return pt_table_install64(&tablep[pts->index], entry, pts->entry);
+}
+#define pt_install_table x86pae_pt_install_table
+
+static inline void x86pae_pt_attr_from_entry(const struct pt_state *pts,
+					     struct pt_write_attrs *attrs)
+{
+	attrs->descriptor_bits = pts->entry &
+				 (X86PAE_FMT_RW | X86PAE_FMT_U | X86PAE_FMT_A |
+				  X86PAE_FMT_D | X86PAE_FMT_XD);
+}
+#define pt_attr_from_entry x86pae_pt_attr_from_entry
+
+static inline void x86pae_pt_clear_entry(struct pt_state *pts,
+					 unsigned int num_contig_lg2)
+{
+	u64 *tablep = pt_cur_table(pts, u64);
+
+	WRITE_ONCE(tablep[pts->index], 0);
+}
+#define pt_clear_entry x86pae_pt_clear_entry
+
+/* --- iommu */
+#include <linux/generic_pt/iommu.h>
+#include <linux/iommu.h>
+
+#define pt_iommu_table pt_iommu_x86pae
+
+/* The common struct is in the per-format common struct */
+static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table)
+{
+	return &container_of(iommu_table, struct pt_iommu_table, iommu)
+			->x86pae_pt.common;
+}
+
+static inline struct pt_iommu *iommu_from_common(struct pt_common *common)
+{
+	return &container_of(common, struct pt_iommu_table, x86pae_pt.common)
+			->iommu;
+}
+
+static inline int x86pae_pt_iommu_set_prot(struct pt_common *common,
+					   struct pt_write_attrs *attrs,
+					   unsigned int iommu_prot)
+{
+	u64 pte;
+
+	pte = X86PAE_FMT_U | X86PAE_FMT_A | X86PAE_FMT_D;
+	if (iommu_prot & IOMMU_WRITE)
+		pte |= X86PAE_FMT_RW;
+
+	attrs->descriptor_bits = pte;
+	return 0;
+}
+#define pt_iommu_set_prot x86pae_pt_iommu_set_prot
+
+static inline int x86pae_pt_iommu_fmt_init(struct pt_iommu_x86pae *iommu_table,
+					   struct pt_iommu_x86pae_cfg *cfg)
+{
+	struct pt_x86pae *table = &iommu_table->x86pae_pt;
+
+	pt_top_set_level(&table->common, 3); // FIXME settable
+	return 0;
+}
+#define pt_iommu_fmt_init x86pae_pt_iommu_fmt_init
+
+#if defined(GENERIC_PT_KUNIT)
+static void x86pae_pt_kunit_setup_cfg(struct pt_iommu_x86pae_cfg *cfg)
+{
+}
+#define pt_kunit_setup_cfg x86pae_pt_kunit_setup_cfg
+#endif
+
+#if defined(GENERIC_PT_KUNIT) && IS_ENABLED(CONFIG_AMD_IOMMU)
+#include <linux/io-pgtable.h>
+#include "../../amd/amd_iommu_types.h"
+
+static struct io_pgtable_ops *
+x86pae_pt_iommu_alloc_io_pgtable(struct pt_iommu_x86pae_cfg *cfg,
+				 struct device *iommu_dev,
+				 struct io_pgtable_cfg **pgtbl_cfg)
+{
+	struct amd_io_pgtable *pgtable;
+	struct io_pgtable_ops *pgtbl_ops;
+
+	/*
+	 * AMD expects that io_pgtable_cfg is allocated to its type by the
+	 * caller.
+	 */
+	pgtable = kzalloc(sizeof(*pgtable), GFP_KERNEL);
+	if (!pgtable)
+		return NULL;
+
+	pgtable->iop.cfg.iommu_dev = iommu_dev;
+	pgtable->iop.cfg.amd.nid = NUMA_NO_NODE;
+	pgtbl_ops = alloc_io_pgtable_ops(AMD_IOMMU_V2, &pgtable->iop.cfg, NULL);
+	if (!pgtbl_ops) {
+		kfree(pgtable);
+		return NULL;
+	}
+	*pgtbl_cfg = &pgtable->iop.cfg;
+	return pgtbl_ops;
+}
+#define pt_iommu_alloc_io_pgtable x86pae_pt_iommu_alloc_io_pgtable
+
+static void x86pae_pt_iommu_free_pgtbl_cfg(struct io_pgtable_cfg *pgtbl_cfg)
+{
+	struct amd_io_pgtable *pgtable =
+		container_of(pgtbl_cfg, struct amd_io_pgtable, iop.cfg);
+
+	kfree(pgtable);
+}
+#define pt_iommu_free_pgtbl_cfg x86pae_pt_iommu_free_pgtbl_cfg
+
+static void x86pae_pt_iommu_setup_ref_table(struct pt_iommu_x86pae *iommu_table,
+					    struct io_pgtable_ops *pgtbl_ops)
+{
+	struct io_pgtable_cfg *pgtbl_cfg =
+		&io_pgtable_ops_to_pgtable(pgtbl_ops)->cfg;
+	struct amd_io_pgtable *pgtable =
+		container_of(pgtbl_cfg, struct amd_io_pgtable, iop.cfg);
+	struct pt_common *common = &iommu_table->x86pae_pt.common;
+
+	if (pgtbl_cfg->ias == 52 && PT_MAX_TOP_LEVEL >= 3)
+		pt_top_set(common, (struct pt_table_p *)pgtable->pgd, 3);
+	else if (pgtbl_cfg->ias == 57 && PT_MAX_TOP_LEVEL >= 4)
+		pt_top_set(common, (struct pt_table_p *)pgtable->pgd, 4);
+	else
+		WARN_ON(true);
+}
+#define pt_iommu_setup_ref_table x86pae_pt_iommu_setup_ref_table
+
+static u64 x86pae_pt_kunit_cmp_mask_entry(struct pt_state *pts)
+{
+	if (pts->type == PT_ENTRY_TABLE)
+		return pts->entry & (~(u64)(X86PAE_FMT_OA));
+	return pts->entry;
+}
+#define pt_kunit_cmp_mask_entry x86pae_pt_kunit_cmp_mask_entry
+#endif
+
+#endif
diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h
index e8d489dff756a8..e35fb83657f73b 100644
--- a/include/linux/generic_pt/common.h
+++ b/include/linux/generic_pt/common.h
@@ -126,4 +126,8 @@ enum {
 	PT_FEAT_ARMV8_NS,
 };
 
+struct pt_x86pae {
+	struct pt_common common;
+};
+
 #endif
diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h
index bf139c5657fc06..ca69bb6192d1a7 100644
--- a/include/linux/generic_pt/iommu.h
+++ b/include/linux/generic_pt/iommu.h
@@ -289,4 +289,16 @@ static inline int pt_iommu_armv8_init(struct pt_iommu_armv8 *table,
 	}
 }
 
+struct pt_iommu_x86pae {
+	struct pt_iommu iommu;
+	struct pt_x86pae x86pae_pt;
+};
+
+struct pt_iommu_x86pae_cfg {
+	struct device *iommu_device;
+	unsigned int features;
+};
+int pt_iommu_x86pae_init(struct pt_iommu_x86pae *table,
+			 struct pt_iommu_x86pae_cfg *cfg, gfp_t gfp);
+
 #endif
-- 
2.46.0





[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux