The VT-D second stage format is almost the same as the x86 PAE format, except the bit encodings in the PTE are different and a few new PTE features, like force coherency are present. Among all the formats it is unique in not having a designated present bit. Cc: Tina Zhang <tina.zhang@xxxxxxxxx> Cc: Kevin Tian <kevin.tian@xxxxxxxxx> Cc: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/generic_pt/Kconfig | 6 + drivers/iommu/generic_pt/fmt/Makefile | 2 + drivers/iommu/generic_pt/fmt/defs_vtdss.h | 21 ++ drivers/iommu/generic_pt/fmt/iommu_vtdss.c | 8 + drivers/iommu/generic_pt/fmt/vtdss.h | 276 +++++++++++++++++++++ include/linux/generic_pt/common.h | 4 + include/linux/generic_pt/iommu.h | 12 + 7 files changed, 329 insertions(+) create mode 100644 drivers/iommu/generic_pt/fmt/defs_vtdss.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_vtdss.c create mode 100644 drivers/iommu/generic_pt/fmt/vtdss.h diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig index 2d08b58e953e4d..c17e09e2d03025 100644 --- a/drivers/iommu/generic_pt/Kconfig +++ b/drivers/iommu/generic_pt/Kconfig @@ -90,6 +90,11 @@ config IOMMU_PT_DART If unsure, say N here. +config IOMMU_PT_VTDSS + tristate "IOMMU page table for Intel VT-D IOMMU Second Stage" + depends on !GENERIC_ATOMIC64 # for cmpxchg64 + default n + config IOMMU_PT_X86PAE tristate "IOMMU page table for x86 PAE" depends on !GENERIC_ATOMIC64 # for cmpxchg64 @@ -105,6 +110,7 @@ config IOMMUT_PT_KUNIT_TEST depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K depends on IOMMU_PT_DART || !IOMMU_PT_DART + depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS depends on IOMMU_PT_X86PAE || !IOMMU_PT_X86PAE default KUNIT_ALL_TESTS endif diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile index 1e10be24758fef..5a77c64d432534 100644 --- a/drivers/iommu/generic_pt/fmt/Makefile +++ b/drivers/iommu/generic_pt/fmt/Makefile @@ -10,6 +10,8 @@ iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K) += armv8_64k iommu_pt_fmt-$(CONFIG_IOMMU_PT_DART) += dart +iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss + iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86PAE) += x86pae IOMMU_PT_KUNIT_TEST := diff --git a/drivers/iommu/generic_pt/fmt/defs_vtdss.h b/drivers/iommu/generic_pt/fmt/defs_vtdss.h new file mode 100644 index 00000000000000..4a239bcaae2a90 --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/defs_vtdss.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + * + */ +#ifndef __GENERIC_PT_FMT_DEFS_VTDSS_H +#define __GENERIC_PT_FMT_DEFS_VTDSS_H + +#include <linux/generic_pt/common.h> +#include <linux/types.h> + +typedef u64 pt_vaddr_t; +typedef u64 pt_oaddr_t; + +struct vtdss_pt_write_attrs { + u64 descriptor_bits; + gfp_t gfp; +}; +#define pt_write_attrs vtdss_pt_write_attrs + +#endif diff --git a/drivers/iommu/generic_pt/fmt/iommu_vtdss.c b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c new file mode 100644 index 00000000000000..12e7829815047b --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#define PT_FMT vtdss +#define PT_SUPPORTED_FEATURES 0 + +#include "iommu_template.h" diff --git a/drivers/iommu/generic_pt/fmt/vtdss.h b/drivers/iommu/generic_pt/fmt/vtdss.h new file mode 100644 index 00000000000000..233731365ac62d --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/vtdss.h @@ -0,0 +1,276 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + * + * Intel VT-D Second Stange 5/4 level page table + * + * This is described in + * Section "3.7 Second-Stage Translation" + * Section "9.8 Second-Stage Paging Entries" + * + * Of the "Intel Virtualization Technology for Directed I/O Architecture + * Specification". + * + * The named levels in the spec map to the pts->level as: + * Table/SS-PTE - 0 + * Directory/SS-PDE - 1 + * Directory Ptr/SS-PDPTE - 2 + * PML4/SS-PML4E - 3 + * PML5/SS-PML5E - 4 + * FIXME: + * force_snooping + * 1g optional + * forbid read-only + * Use of direct clflush instead of DMA API + */ +#ifndef __GENERIC_PT_FMT_VTDSS_H +#define __GENERIC_PT_FMT_VTDSS_H + +#include "defs_vtdss.h" +#include "../pt_defs.h" + +#include <linux/bitfield.h> +#include <linux/container_of.h> +#include <linux/log2.h> + +enum { + PT_MAX_OUTPUT_ADDRESS_LG2 = 52, + PT_MAX_VA_ADDRESS_LG2 = 57, + PT_ENTRY_WORD_SIZE = sizeof(u64), + PT_MAX_TOP_LEVEL = 4, + PT_GRANUAL_LG2SZ = 12, + PT_TABLEMEM_LG2SZ = 12, +}; + +/* Shared descriptor bits */ +enum { + VTDSS_FMT_R = BIT(0), + VTDSS_FMT_W = BIT(1), + VTDSS_FMT_X = BIT(2), + VTDSS_FMT_A = BIT(8), + VTDSS_FMT_D = BIT(9), + VTDSS_FMT_SNP = BIT(11), + VTDSS_FMT_OA = GENMASK_ULL(51, 12), +}; + +/* PDPTE/PDE */ +enum { + VTDSS_FMT_PS = BIT(7), +}; + +#define common_to_vtdss_pt(common_ptr) \ + container_of_const(common_ptr, struct pt_vtdss, common) +#define to_vtdss_pt(pts) common_to_vtdss_pt((pts)->range->common) + +static inline pt_oaddr_t vtdss_pt_table_pa(const struct pt_state *pts) +{ + return log2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry), PT_TABLEMEM_LG2SZ); +} +#define pt_table_pa vtdss_pt_table_pa + +static inline pt_oaddr_t vtdss_pt_entry_oa(const struct pt_state *pts) +{ + return log2_mul(FIELD_GET(VTDSS_FMT_OA, pts->entry), PT_GRANUAL_LG2SZ); +} +#define pt_entry_oa vtdss_pt_entry_oa + +static inline bool vtdss_pt_can_have_leaf(const struct pt_state *pts) +{ + return pts->level <= 2; +} +#define pt_can_have_leaf vtdss_pt_can_have_leaf + +static inline unsigned int vtdss_pt_table_item_lg2sz(const struct pt_state *pts) +{ + return PT_GRANUAL_LG2SZ + + (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * pts->level; +} +#define pt_table_item_lg2sz vtdss_pt_table_item_lg2sz + +static inline unsigned int vtdss_pt_num_items_lg2(const struct pt_state *pts) +{ + return PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64)); +} +#define pt_num_items_lg2 vtdss_pt_num_items_lg2 + +static inline enum pt_entry_type vtdss_pt_load_entry_raw(struct pt_state *pts) +{ + const u64 *tablep = pt_cur_table(pts, u64); + u64 entry; + + pts->entry = entry = READ_ONCE(tablep[pts->index]); + if (!entry) + return PT_ENTRY_EMPTY; + if (pts->level == 0 || + (vtdss_pt_can_have_leaf(pts) && (pts->entry & VTDSS_FMT_PS))) + return PT_ENTRY_OA; + return PT_ENTRY_TABLE; +} +#define pt_load_entry_raw vtdss_pt_load_entry_raw + +static inline void +vtdss_pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, + unsigned int oasz_lg2, + const struct pt_write_attrs *attrs) +{ + u64 *tablep = pt_cur_table(pts, u64); + u64 entry; + + entry = FIELD_PREP(VTDSS_FMT_OA, log2_div(oa, PT_GRANUAL_LG2SZ)) | + attrs->descriptor_bits; + if (pts->level != 0) + entry |= VTDSS_FMT_PS; + + WRITE_ONCE(tablep[pts->index], entry); + pts->entry = entry; +} +#define pt_install_leaf_entry vtdss_pt_install_leaf_entry + +static inline bool vtdss_pt_install_table(struct pt_state *pts, + pt_oaddr_t table_pa, + const struct pt_write_attrs *attrs) +{ + u64 *tablep = pt_cur_table(pts, u64); + u64 entry; + + /* + * FIXME according to the SDM D is ignored by HW on table pointers? + * io_pgtable_v2 sets it + */ + entry = VTDSS_FMT_R | VTDSS_FMT_W | + FIELD_PREP(VTDSS_FMT_OA, log2_div(table_pa, PT_GRANUAL_LG2SZ)); + return pt_table_install64(&tablep[pts->index], entry, pts->entry); +} +#define pt_install_table vtdss_pt_install_table + +static inline void vtdss_pt_attr_from_entry(const struct pt_state *pts, + struct pt_write_attrs *attrs) +{ + attrs->descriptor_bits = pts->entry & (VTDSS_FMT_R | VTDSS_FMT_W | + VTDSS_FMT_X | VTDSS_FMT_SNP); +} +#define pt_attr_from_entry vtdss_pt_attr_from_entry + +static inline void vtdss_pt_clear_entry(struct pt_state *pts, + unsigned int num_contig_lg2) +{ + u64 *tablep = pt_cur_table(pts, u64); + + WRITE_ONCE(tablep[pts->index], 0); +} +#define pt_clear_entry vtdss_pt_clear_entry + +/* --- iommu */ +#include <linux/generic_pt/iommu.h> +#include <linux/iommu.h> + +#define pt_iommu_table pt_iommu_vtdss + +/* The common struct is in the per-format common struct */ +static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table) +{ + return &container_of(iommu_table, struct pt_iommu_table, iommu) + ->vtdss_pt.common; +} + +static inline struct pt_iommu *iommu_from_common(struct pt_common *common) +{ + return &container_of(common, struct pt_iommu_table, vtdss_pt.common) + ->iommu; +} + +static inline int vtdss_pt_iommu_set_prot(struct pt_common *common, + struct pt_write_attrs *attrs, + unsigned int iommu_prot) +{ + u64 pte = 0; + + /* + * VTDSS does not have a present bit, so we tell if any entry is present + * by checking for R or W. + */ + if (!(iommu_prot & (IOMMU_READ | IOMMU_WRITE))) + return -EINVAL; + + /* + * FIXME: The VTD driver has a bug setting DMA_FL_PTE_PRESENT on the SS + * table, which forces R on always. + */ + pte |= VTDSS_FMT_R; + + if (iommu_prot & IOMMU_READ) + pte |= VTDSS_FMT_R; + if (iommu_prot & IOMMU_WRITE) + pte |= VTDSS_FMT_W; +/* FIXME if (dmar_domain->set_pte_snp) + pte |= VTDSS_FMT_SNP; */ + + attrs->descriptor_bits = pte; + return 0; +} +#define pt_iommu_set_prot vtdss_pt_iommu_set_prot + +static inline int vtdss_pt_iommu_fmt_init(struct pt_iommu_vtdss *iommu_table, + struct pt_iommu_vtdss_cfg *cfg) +{ + struct pt_vtdss *table = &iommu_table->vtdss_pt; + + /* FIXME configurable */ + pt_top_set_level(&table->common, 3); + return 0; +} +#define pt_iommu_fmt_init vtdss_pt_iommu_fmt_init + +#if defined(GENERIC_PT_KUNIT) +static void vtdss_pt_kunit_setup_cfg(struct pt_iommu_vtdss_cfg *cfg) +{ +} +#define pt_kunit_setup_cfg vtdss_pt_kunit_setup_cfg +#endif + +/* + * Requires Tina's series: + * https://patch.msgid.link/r/20231106071226.9656-3-tina.zhang@xxxxxxxxx + * See my github for an integrated version + */ +#if defined(GENERIC_PT_KUNIT) && IS_ENABLED(CONFIG_CONFIG_IOMMU_IO_PGTABLE_VTD) +#include <linux/io-pgtable.h> + +static struct io_pgtable_ops * +vtdss_pt_iommu_alloc_io_pgtable(struct pt_iommu_vtdss_cfg *cfg, + struct device *iommu_dev, + struct io_pgtable_cfg **unused_pgtbl_cfg) +{ + struct io_pgtable_cfg pgtbl_cfg = {}; + + pgtbl_cfg.ias = 48; + pgtbl_cfg.oas = 52; + pgtbl_cfg.vtd_cfg.cap_reg = 4 << 8; + pgtbl_cfg.vtd_cfg.ecap_reg = BIT(26) | BIT(60) | BIT_ULL(48) | BIT_ULL(56); + pgtbl_cfg.pgsize_bitmap = SZ_4K; + pgtbl_cfg.coherent_walk = true; + return alloc_io_pgtable_ops(INTEL_IOMMU, &pgtbl_cfg, NULL); +} +#define pt_iommu_alloc_io_pgtable vtdss_pt_iommu_alloc_io_pgtable + +static void vtdss_pt_iommu_setup_ref_table(struct pt_iommu_vtdss *iommu_table, + struct io_pgtable_ops *pgtbl_ops) +{ + struct io_pgtable_cfg *pgtbl_cfg = + &io_pgtable_ops_to_pgtable(pgtbl_ops)->cfg; + struct pt_common *common = &iommu_table->vtdss_pt.common; + + pt_top_set(common, __va(pgtbl_cfg->vtd_cfg.pgd), 3); +} +#define pt_iommu_setup_ref_table vtdss_pt_iommu_setup_ref_table + +static u64 vtdss_pt_kunit_cmp_mask_entry(struct pt_state *pts) +{ + if (pts->type == PT_ENTRY_TABLE) + return pts->entry & (~(u64)(VTDSS_FMT_OA)); + return pts->entry; +} +#define pt_kunit_cmp_mask_entry vtdss_pt_kunit_cmp_mask_entry +#endif + +#endif diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h index 558302fe1e0324..a3469132db7dda 100644 --- a/include/linux/generic_pt/common.h +++ b/include/linux/generic_pt/common.h @@ -145,6 +145,10 @@ enum { PT_FEAT_DART_V2 = PT_FEAT_FMT_START, }; +struct pt_vtdss { + struct pt_common common; +}; + struct pt_x86pae { struct pt_common common; }; diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h index 351a69fe62dd1d..b9ecab07b0223d 100644 --- a/include/linux/generic_pt/iommu.h +++ b/include/linux/generic_pt/iommu.h @@ -317,6 +317,18 @@ struct pt_iommu_dart_cfg { int pt_iommu_dart_init(struct pt_iommu_dart *table, struct pt_iommu_dart_cfg *cfg, gfp_t gfp); +struct pt_iommu_vtdss { + struct pt_iommu iommu; + struct pt_vtdss vtdss_pt; +}; + +struct pt_iommu_vtdss_cfg { + struct device *iommu_device; + unsigned int features; +}; +int pt_iommu_vtdss_init(struct pt_iommu_vtdss *table, + struct pt_iommu_vtdss_cfg *cfg, gfp_t gfp); + struct pt_iommu_x86pae { struct pt_iommu iommu; struct pt_x86pae x86pae_pt; -- 2.46.0