The features, format and naming is taking from the ARMv8 VMSAv8-64 chapter. ARMv8 uses almost all the features of the common implementation: - Contigous pages - Leaf pages at many levels - Variable top level - Variable size top level, including super-sized (concatenated tables) - Dirty tracking - low or high starting VA Compared to the io-pgtable version this also implements the contiguous page hint, and supports dirty readback from the S2. The common algorithms use a bit in the folio to keep track of the cache invalidation race, while the io-pgtable version uses a SW bit in the table PTE. In part as an demonstration, to be evaluated with performace data, ARMv8 is multi-compiled for each of the 4k/16k/64k granule size. This gives 3x the .text usage with an unmeasured performance improvement. It shows how Generic PT can be used to optimize code gen. FIXME: Not every detail around the variable VA width is fully completed and tested yet. Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx> --- drivers/iommu/generic_pt/Kconfig | 39 ++ drivers/iommu/generic_pt/fmt/Makefile | 4 + drivers/iommu/generic_pt/fmt/armv8.h | 621 ++++++++++++++++++ drivers/iommu/generic_pt/fmt/defs_armv8.h | 28 + .../iommu/generic_pt/fmt/iommu_armv8_16k.c | 13 + drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c | 13 + .../iommu/generic_pt/fmt/iommu_armv8_64k.c | 13 + include/linux/generic_pt/common.h | 22 + include/linux/generic_pt/iommu.h | 73 ++ 9 files changed, 826 insertions(+) create mode 100644 drivers/iommu/generic_pt/fmt/armv8.h create mode 100644 drivers/iommu/generic_pt/fmt/defs_armv8.h create mode 100644 drivers/iommu/generic_pt/fmt/iommu_armv8_16k.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c create mode 100644 drivers/iommu/generic_pt/fmt/iommu_armv8_64k.c diff --git a/drivers/iommu/generic_pt/Kconfig b/drivers/iommu/generic_pt/Kconfig index 3ac9b2324ebd98..260fff5daa6e57 100644 --- a/drivers/iommu/generic_pt/Kconfig +++ b/drivers/iommu/generic_pt/Kconfig @@ -29,10 +29,49 @@ config IOMMU_PT Generic library for building IOMMU page tables if IOMMU_PT +config IOMMU_PT_ARMV8_4K + tristate "IOMMU page table for 64 bit ARMv8 4k page size" + depends on !GENERIC_ATOMIC64 # for cmpxchg64 + default n + help + Enable support for the ARMv8 VMSAv8-64 and the VMSAv8-32 long + descriptor pagetable format. This format supports both stage-1 and + stage-2, as well as address spaces up to 48-bits in size. 4K + granule size version. + + If unsure, say N here. + +config IOMMU_PT_ARMV8_16K + tristate "IOMMU page table for 64 bit ARMv8 16k page size" + depends on !GENERIC_ATOMIC64 # for cmpxchg64 + default n + help + Enable support for the ARMv8 VMSAv8-64 and the VMSAv8-32 long + descriptor pagetable format. This format supports both stage-1 and + stage-2, as well as address spaces up to 48-bits in size. 4K + granule size version. + + If unsure, say N here. + +config IOMMU_PT_ARMV8_64K + tristate "IOMMU page table for 64 bit ARMv8 64k page size" + depends on !GENERIC_ATOMIC64 # for cmpxchg64 + default n + help + Enable support for the ARMv8 VMSAv8-64 and the VMSAv8-32 long + descriptor pagetable format. This format supports both stage-1 and + stage-2, as well as address spaces up to 48-bits in size. 4K + granule size version. + + If unsure, say N here. + config IOMMUT_PT_KUNIT_TEST tristate "IOMMU Page Table KUnit Test" if !KUNIT_ALL_TESTS select IOMMU_IO_PGTABLE depends on KUNIT + depends on IOMMU_PT_ARMV8_4K || !IOMMU_PT_ARMV8_4K + depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K + depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K default KUNIT_ALL_TESTS endif endif diff --git a/drivers/iommu/generic_pt/fmt/Makefile b/drivers/iommu/generic_pt/fmt/Makefile index 0c35b9ae4dfb34..9a9173ce85e075 100644 --- a/drivers/iommu/generic_pt/fmt/Makefile +++ b/drivers/iommu/generic_pt/fmt/Makefile @@ -1,5 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 +iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_4K) += armv8_4k +iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_16K) += armv8_16k +iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K) += armv8_64k + IOMMU_PT_KUNIT_TEST := define create_format obj-$(2) += iommu_$(1).o diff --git a/drivers/iommu/generic_pt/fmt/armv8.h b/drivers/iommu/generic_pt/fmt/armv8.h new file mode 100644 index 00000000000000..73bccbfa72b19e --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/armv8.h @@ -0,0 +1,621 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + * + * The page table format described by the ARMv8 VMSAv8-64 chapter in the + * Architecture Reference Manual. With the right cfg this will also implement + * the VMSAv8-32 Long Descriptor format. + * + * This was called io-pgtable-arm.c and ARM_xx_LPAE_Sx. + * + * NOTE! The level numbering is consistent with the Generic Page Table API, but + * is backwards from what the ARM documents use. What ARM calls level 3 this + * calls level 0. + * + * Present in io-pgtable-arm.c but not here: + * ARM_MALI_LPAE + * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA + */ +#ifndef __GENERIC_PT_FMT_ARMV8_H +#define __GENERIC_PT_FMT_ARMV8_H + +#include "defs_armv8.h" +#include "../pt_defs.h" + +#include <asm/page.h> +#include <linux/bitfield.h> +#include <linux/bits.h> +#include <linux/container_of.h> +#include <linux/errno.h> +#include <linux/limits.h> +#include <linux/sizes.h> + +#if ARMV8_GRANUAL_SIZE == 4096 +enum { + PT_MAX_TOP_LEVEL = 3, + PT_GRANUAL_LG2SZ = 12, +}; +#elif ARMV8_GRANUAL_SIZE == 16384 +enum { + PT_MAX_TOP_LEVEL = 3, + PT_GRANUAL_LG2SZ = 14, +}; +#elif ARMV8_GRANUAL_SIZE == 65536 +enum { + PT_MAX_TOP_LEVEL = 2, + PT_GRANUAL_LG2SZ = 16, +}; +#else +#error "Invalid ARMV8_GRANUAL_SIZE" +#endif + +enum { + PT_MAX_OUTPUT_ADDRESS_LG2 = 48, + /* + * Currently only support up to 48 bits of usable address, the 64k 52 + * bit mode is not supported. + */ + PT_MAX_VA_ADDRESS_LG2 = 48, + PT_TABLEMEM_LG2SZ = PT_GRANUAL_LG2SZ, + PT_ENTRY_WORD_SIZE = sizeof(u64), +}; + +/* Common PTE bits */ +enum { + ARMV8PT_FMT_VALID = BIT(0), + ARMV8PT_FMT_PAGE = BIT(1), + ARMV8PT_FMT_TABLE = BIT(1), + ARMV8PT_FMT_NS = BIT(5), + ARMV8PT_FMT_SH = GENMASK(9, 8), + ARMV8PT_FMT_AF = BIT(10), + + ARMV8PT_FMT_OA52 = GENMASK_ULL(15, 12), + ARMV8PT_FMT_OA48 = GENMASK_ULL(47, PT_GRANUAL_LG2SZ), + + ARMV8PT_FMT_DBM = BIT_ULL(51), + ARMV8PT_FMT_CONTIG = BIT_ULL(52), + ARMV8PT_FMT_UXN = BIT_ULL(53), + ARMV8PT_FMT_PXN = BIT_ULL(54), + ARMV8PT_FMT_NSTABLE = BIT_ULL(63), +}; + +/* S1 PTE bits */ +enum { + ARMV8PT_FMT_ATTRINDX = GENMASK(4, 2), + ARMV8PT_FMT_AP = GENMASK(7, 6), + ARMV8PT_FMT_nG = BIT(11), +}; + +enum { + ARMV8PT_MAIR_ATTR_IDX_CACHE = 1, + ARMV8PT_MAIR_ATTR_IDX_DEV = 2, + + ARMV8PT_SH_IS = 3, + ARMV8PT_SH_OS = 2, + + ARMV8PT_AP_UNPRIV = 1, + ARMV8PT_AP_RDONLY = 2, +}; + +/* S2 PTE bits */ +enum { + ARMV8PT_FMT_S2MEMATTR = GENMASK(5, 2), + ARMV8PT_FMT_S2AP = GENMASK(7, 6), +}; + +enum { + /* + * For !S2FWB these code to: + * 1111 = Normal outer write back cachable / Inner Write Back Cachable + * Permit S1 to override + * 0101 = Normal Non-cachable / Inner Non-cachable + * 0001 = Device / Device-nGnRE + * For S2FWB these code to: + * 0110 Force Normal Write Back + * 0101 Normal* is forced Normal-NC, Device unchanged + * 0001 Force Device-nGnRE + */ + ARMV8PT_MEMATTR_FWB_WB = 6, + ARMV8PT_MEMATTR_OIWB = 0xf, + ARMV8PT_MEMATTR_NC = 5, + ARMV8PT_MEMATTR_DEV = 1, + + ARMV8PT_S2AP_READ = 1, + ARMV8PT_S2AP_WRITE = 2, +}; + +#define common_to_armv8pt(common_ptr) \ + container_of_const(common_ptr, struct pt_armv8, common) +#define to_armv8pt(pts) common_to_armv8pt((pts)->range->common) + +static inline pt_oaddr_t armv8pt_oa(const struct pt_state *pts) +{ + u64 entry = pts->entry; + pt_oaddr_t oa; + + oa = log2_mul(FIELD_GET(ARMV8PT_FMT_OA48, entry), PT_GRANUAL_LG2SZ); + + /* LPA support on 64K page size */ + if (PT_GRANUAL_SIZE == SZ_64K) + oa |= ((pt_oaddr_t)FIELD_GET(ARMV8PT_FMT_OA52, entry)) << 52; + return oa; +} + +static inline pt_oaddr_t armv8pt_table_pa(const struct pt_state *pts) +{ + return armv8pt_oa(pts); +} +#define pt_table_pa armv8pt_table_pa + +/* + * Return a block or page entry pointing at a physical address Returns the + * address adjusted for the item in a contiguous case. + */ +static inline pt_oaddr_t armv8pt_item_oa(const struct pt_state *pts) +{ + return armv8pt_oa(pts); +} +#define pt_item_oa armv8pt_item_oa + +static inline bool armv8pt_can_have_leaf(const struct pt_state *pts) +{ + /* + * See D5-18 Translation granule sizes, with block and page sizes, and + * output address ranges + */ + if ((PT_GRANUAL_SIZE == SZ_4K && pts->level > 2) || + (PT_GRANUAL_SIZE == SZ_16K && pts->level > 1) || + (PT_GRANUAL_SIZE == SZ_64K && pts_feature(pts, PT_FEAT_ARMV8_LPA) && pts->level > 2) || + (PT_GRANUAL_SIZE == SZ_64K && !pts_feature(pts, PT_FEAT_ARMV8_LPA) && pts->level > 1)) + return false; + return true; +} +#define pt_can_have_leaf armv8pt_can_have_leaf + +static inline unsigned int armv8pt_table_item_lg2sz(const struct pt_state *pts) +{ + return PT_GRANUAL_LG2SZ + + (PT_TABLEMEM_LG2SZ - ilog2(sizeof(u64))) * pts->level; +} +#define pt_table_item_lg2sz armv8pt_table_item_lg2sz + +/* Number contigous entries that ARMV8PT_FMT_CONTIG will join at this level */ +static inline unsigned short +armv8pt_contig_count_lg2(const struct pt_state *pts) +{ + if (PT_GRANUAL_SIZE == SZ_4K) + return ilog2(16); /* 64KB, 2MB */ + else if (PT_GRANUAL_SIZE == SZ_16K && pts->level == 1) + return ilog2(32); /* 1GB */ + else if (PT_GRANUAL_SIZE == SZ_16K && pts->level == 0) + return ilog2(128); /* 2M */ + else if (PT_GRANUAL_SIZE == SZ_64K) + return ilog2(32); /* 2M, 16G */ + return ilog2(1); +} +#define pt_contig_count_lg2 armv8pt_contig_count_lg2 + +static inline unsigned int +armv8pt_entry_num_contig_lg2(const struct pt_state *pts) +{ + if (pts->entry & ARMV8PT_FMT_CONTIG) + return armv8pt_contig_count_lg2(pts); + return ilog2(1); +} +#define pt_entry_num_contig_lg2 armv8pt_entry_num_contig_lg2 + +static inline pt_vaddr_t armv8pt_full_va_prefix(const struct pt_common *common) +{ + if (pt_feature(common, PT_FEAT_ARMV8_TTBR1)) + return PT_VADDR_MAX; + return 0; +} +#define pt_full_va_prefix armv8pt_full_va_prefix + +static inline unsigned int armv8pt_num_items_lg2(const struct pt_state *pts) +{ + /* FIXME S2 concatenated tables */ + return PT_GRANUAL_LG2SZ - ilog2(sizeof(u64)); +} +#define pt_num_items_lg2 armv8pt_num_items_lg2 + +static inline enum pt_entry_type armv8pt_load_entry_raw(struct pt_state *pts) +{ + const u64 *tablep = pt_cur_table(pts, u64); + u64 entry; + + pts->entry = entry = READ_ONCE(tablep[pts->index]); + if (!(entry & ARMV8PT_FMT_VALID)) + return PT_ENTRY_EMPTY; + if (pts->level != 0 && (entry & ARMV8PT_FMT_TABLE)) + return PT_ENTRY_TABLE; + + /* + * Suppress returning VALID for levels that cannot have a page to remove + * code. + */ + if (!armv8pt_can_have_leaf(pts)) + return PT_ENTRY_EMPTY; + + /* Must be a block or page, don't check the page bit on level 0 */ + return PT_ENTRY_OA; +} +#define pt_load_entry_raw armv8pt_load_entry_raw + +static inline void +armv8pt_install_leaf_entry(struct pt_state *pts, pt_oaddr_t oa, + unsigned int oasz_lg2, + const struct pt_write_attrs *attrs) +{ + unsigned int isz_lg2 = pt_table_item_lg2sz(pts); + u64 *tablep = pt_cur_table(pts, u64); + u64 entry; + + PT_WARN_ON(log2_mod(oa, oasz_lg2)); + + entry = ARMV8PT_FMT_VALID | + FIELD_PREP(ARMV8PT_FMT_OA48, log2_div(oa, PT_GRANUAL_LG2SZ)) | + FIELD_PREP(ARMV8PT_FMT_OA52, oa >> 48) | attrs->descriptor_bits; + + /* + * On the last level the leaf is called a page and has the page/table bit set, + * on other levels it is called a block and has it clear. + */ + if (pts->level == 0) + entry |= ARMV8PT_FMT_PAGE; + + if (oasz_lg2 != isz_lg2) { + u64 *end; + + PT_WARN_ON(oasz_lg2 != isz_lg2 + armv8pt_contig_count_lg2(pts)); + PT_WARN_ON(log2_mod(pts->index, armv8pt_contig_count_lg2(pts))); + + entry |= ARMV8PT_FMT_CONTIG; + tablep += pts->index; + end = tablep + log2_to_int(armv8pt_contig_count_lg2(pts)); + for (; tablep != end; tablep++) { + WRITE_ONCE(*tablep, entry); + entry += FIELD_PREP( + ARMV8PT_FMT_OA48, + log2_to_int(isz_lg2 - PT_GRANUAL_LG2SZ)); + } + } else { + WRITE_ONCE(tablep[pts->index], entry); + } + pts->entry = entry; +} +#define pt_install_leaf_entry armv8pt_install_leaf_entry + +static inline bool armv8pt_install_table(struct pt_state *pts, + pt_oaddr_t table_pa, + const struct pt_write_attrs *attrs) +{ + u64 *tablep = pt_cur_table(pts, u64); + u64 entry; + + entry = ARMV8PT_FMT_VALID | ARMV8PT_FMT_TABLE | + FIELD_PREP(ARMV8PT_FMT_OA48, + log2_div(table_pa, PT_GRANUAL_LG2SZ)) | + FIELD_PREP(ARMV8PT_FMT_OA52, table_pa >> 48); + + if (pts_feature(pts, PT_FEAT_ARMV8_NS)) + entry |= ARMV8PT_FMT_NSTABLE; + + return pt_table_install64(&tablep[pts->index], entry, pts->entry); +} +#define pt_install_table armv8pt_install_table + +static inline void armv8pt_attr_from_entry(const struct pt_state *pts, + struct pt_write_attrs *attrs) +{ + attrs->descriptor_bits = + pts->entry & + (ARMV8PT_FMT_SH | ARMV8PT_FMT_AF | ARMV8PT_FMT_UXN | + ARMV8PT_FMT_PXN | ARMV8PT_FMT_ATTRINDX | ARMV8PT_FMT_AP | + ARMV8PT_FMT_nG | ARMV8PT_FMT_S2MEMATTR | ARMV8PT_FMT_S2AP); +} +#define pt_attr_from_entry armv8pt_attr_from_entry + +static inline void armv8pt_clear_entry(struct pt_state *pts, + unsigned int num_contig_lg2) +{ + u64 *tablep = pt_cur_table(pts, u64); + u64 *end; + + PT_WARN_ON(log2_mod(pts->index, num_contig_lg2)); + + tablep += pts->index; + end = tablep + log2_to_int(num_contig_lg2); + for (; tablep != end; tablep++) + WRITE_ONCE(*tablep, 0); +} +#define pt_clear_entry armv8pt_clear_entry + +/* + * Call fn over all the items in an entry. If the entry is contiguous this + * iterates over the entire contiguous entry, including items preceding + * pts->va. always_inline avoids an indirect function call. + */ +static __always_inline bool armv8pt_reduce_contig(const struct pt_state *pts, + bool (*fn)(u64 *tablep, + u64 entry)) +{ + u64 *tablep = pt_cur_table(pts, u64); + + if (pts->entry & ARMV8PT_FMT_CONTIG) { + unsigned int num_contig_lg2 = armv8pt_contig_count_lg2(pts); + u64 *end; + + tablep += log2_set_mod(pts->index, 0, num_contig_lg2); + end = tablep + log2_to_int(num_contig_lg2); + for (; tablep != end; tablep++) + if (fn(tablep, READ_ONCE(*tablep))) + return true; + return false; + } + return fn(tablep + pts->index, pts->entry); +} + +static inline bool armv8pt_check_is_dirty_s1(u64 *tablep, u64 entry) +{ + return (entry & (ARMV8PT_FMT_DBM | + FIELD_PREP(ARMV8PT_FMT_AP, ARMV8PT_AP_RDONLY))) == + ARMV8PT_FMT_DBM; +} + +static bool armv6pt_clear_dirty_s1(u64 *tablep, u64 entry) +{ + WRITE_ONCE(*tablep, + entry | FIELD_PREP(ARMV8PT_FMT_AP, ARMV8PT_AP_RDONLY)); + return false; +} + +static inline bool armv8pt_check_is_dirty_s2(u64 *tablep, u64 entry) +{ + const u64 DIRTY = ARMV8PT_FMT_DBM | + FIELD_PREP(ARMV8PT_FMT_S2AP, ARMV8PT_S2AP_WRITE); + + return (entry & DIRTY) == DIRTY; +} + +static bool armv6pt_clear_dirty_s2(u64 *tablep, u64 entry) +{ + WRITE_ONCE(*tablep, entry & ~(u64)FIELD_PREP(ARMV8PT_FMT_S2AP, + ARMV8PT_S2AP_WRITE)); + return false; +} + +static inline bool armv8pt_entry_write_is_dirty(const struct pt_state *pts) +{ + if (!pts_feature(pts, PT_FEAT_ARMV8_S2)) + return armv8pt_reduce_contig(pts, armv8pt_check_is_dirty_s1); + else + return armv8pt_reduce_contig(pts, armv8pt_check_is_dirty_s2); +} +#define pt_entry_write_is_dirty armv8pt_entry_write_is_dirty + +static inline void armv8pt_entry_set_write_clean(struct pt_state *pts) +{ + if (!pts_feature(pts, PT_FEAT_ARMV8_S2)) + armv8pt_reduce_contig(pts, armv6pt_clear_dirty_s1); + else + armv8pt_reduce_contig(pts, armv6pt_clear_dirty_s2); +} +#define pt_entry_set_write_clean armv8pt_entry_set_write_clean + +/* --- iommu */ +#include <linux/generic_pt/iommu.h> +#include <linux/iommu.h> + +#define pt_iommu_table pt_iommu_armv8 + +/* The common struct is in the per-format common struct */ +static inline struct pt_common *common_from_iommu(struct pt_iommu *iommu_table) +{ + return &container_of(iommu_table, struct pt_iommu_table, iommu) + ->armpt.common; +} + +static inline struct pt_iommu *iommu_from_common(struct pt_common *common) +{ + return &container_of(common, struct pt_iommu_table, armpt.common)->iommu; +} + +static inline int armv8pt_iommu_set_prot(struct pt_common *common, + struct pt_write_attrs *attrs, + unsigned int iommu_prot) +{ + bool is_s1 = !pt_feature(common, PT_FEAT_ARMV8_S2); + u64 pte = 0; + + if (is_s1) { + u64 ap = 0; + + if (!(iommu_prot & IOMMU_WRITE) && (iommu_prot & IOMMU_READ)) + ap |= ARMV8PT_AP_RDONLY; + if (!(iommu_prot & IOMMU_PRIV)) + ap |= ARMV8PT_AP_UNPRIV; + pte = ARMV8PT_FMT_nG | FIELD_PREP(ARMV8PT_FMT_AP, ap); + + if (iommu_prot & IOMMU_MMIO) + pte |= FIELD_PREP(ARMV8PT_FMT_ATTRINDX, + ARMV8PT_MAIR_ATTR_IDX_DEV); + else if (iommu_prot & IOMMU_CACHE) + pte |= FIELD_PREP(ARMV8PT_FMT_ATTRINDX, + ARMV8PT_MAIR_ATTR_IDX_CACHE); + } else { + u64 s2ap = 0; + + if (iommu_prot & IOMMU_READ) + s2ap |= ARMV8PT_S2AP_READ; + if (iommu_prot & IOMMU_WRITE) + s2ap |= ARMV8PT_S2AP_WRITE; + pte = FIELD_PREP(ARMV8PT_FMT_S2AP, s2ap); + + if (iommu_prot & IOMMU_MMIO) + pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR, + ARMV8PT_MEMATTR_DEV); + else if ((iommu_prot & IOMMU_CACHE) && + pt_feature(common, PT_FEAT_ARMV8_S2FWB)) + pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR, + ARMV8PT_MEMATTR_FWB_WB); + else if (iommu_prot & IOMMU_CACHE) + pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR, + ARMV8PT_MEMATTR_OIWB); + else + pte |= FIELD_PREP(ARMV8PT_FMT_S2MEMATTR, + ARMV8PT_MEMATTR_NC); + } + + /* + * For DBM the writable entry starts out dirty to avoid the HW doing + * memory accesses to dirty it. We can just leave the DBM bit + * permanently set with no cost. + */ + if (pt_feature(common, PT_FEAT_ARMV8_DBM) && (iommu_prot & IOMMU_WRITE)) + pte |= ARMV8PT_FMT_DBM; + + if (iommu_prot & IOMMU_CACHE) + pte |= FIELD_PREP(ARMV8PT_FMT_SH, ARMV8PT_SH_IS); + else + pte |= FIELD_PREP(ARMV8PT_FMT_SH, ARMV8PT_SH_OS); + + /* FIXME for mali: + pte |= ARM_LPAE_PTE_SH_OS; + */ + + if (iommu_prot & IOMMU_NOEXEC) + pte |= ARMV8PT_FMT_UXN | ARMV8PT_FMT_PXN; + + if (pt_feature(common, PT_FEAT_ARMV8_NS)) + pte |= ARMV8PT_FMT_NS; + + // FIXME not on mali: + pte |= ARMV8PT_FMT_AF; + + attrs->descriptor_bits = pte; + return 0; +} +#define pt_iommu_set_prot armv8pt_iommu_set_prot + +static inline int armv8pt_iommu_fmt_init(struct pt_iommu_armv8 *iommu_table, + struct pt_iommu_armv8_cfg *cfg) +{ + struct pt_armv8 *armv8pt = &iommu_table->armpt; + unsigned int levels; + + /* Atomicity of dirty bits conflicts with an incoherent cache */ + if ((cfg->features & PT_FEAT_ARMV8_DBM) && + (cfg->features & PT_FEAT_DMA_INCOHERENT)) + return -EOPNOTSUPP; + + /* FIXME are these inputs supposed to be an exact request, or a HW capability? */ + + if (cfg->ias_lg2 <= PT_GRANUAL_LG2SZ) + return -EINVAL; + + if ((PT_GRANUAL_SIZE == SZ_64K && cfg->oas_lg2 > 52) || + (PT_GRANUAL_SIZE != SZ_64K && cfg->oas_lg2 > 48)) + return -EINVAL; + + /*if (cfg->ias > 48) + table->feat_lva = true; */ + + cfg->ias_lg2 = min(cfg->ias_lg2, PT_MAX_VA_ADDRESS_LG2); + + levels = DIV_ROUND_UP(cfg->ias_lg2 - PT_GRANUAL_LG2SZ, + PT_GRANUAL_LG2SZ - ilog2(sizeof(u64))); + if (levels > PT_MAX_TOP_LEVEL + 1) + return -EINVAL; + + /* + * Table D5-6 PA size implications for the VTCR_EL2.{T0SZ, SL0} + * Single level is not supported without FEAT_TTST, which we are not + * implementing. + */ + if (pt_feature(&armv8pt->common, PT_FEAT_ARMV8_S2) && + PT_GRANUAL_SIZE == SZ_4K && levels == 1) + return -EINVAL; + + /* FIXME - test me S2 concatenated translation tables + if (levels > 1 && cfg->is_s2 && + cfg->ias_lg2 - (ARMV8PT_LVL0_ITEM_LG2SZ * (levels - 1)) <= 4) + levels--; + */ + pt_top_set_level(&armv8pt->common, levels - 1); + armv8pt->common.max_vasz_lg2 = cfg->ias_lg2; + armv8pt->common.max_oasz_lg2 = cfg->oas_lg2; + return 0; +} +#define pt_iommu_fmt_init armv8pt_iommu_fmt_init + +#if defined(GENERIC_PT_KUNIT) +static inline void armv8pt_kunit_setup_cfg(struct pt_iommu_armv8_cfg *cfg) +{ + cfg->ias_lg2 = 48; + cfg->oas_lg2 = 48; + + cfg->features &= ~(BIT(PT_FEAT_ARMV8_TTBR1) | BIT(PT_FEAT_ARMV8_S2) | + BIT(PT_FEAT_ARMV8_DBM) | BIT(PT_FEAT_ARMV8_S2FWB) | + BIT(PT_FEAT_ARMV8_NS)); +} +#define pt_kunit_setup_cfg armv8pt_kunit_setup_cfg +#endif + +#if defined(GENERIC_PT_KUNIT) && IS_ENABLED(CONFIG_IOMMU_IO_PGTABLE_LPAE) +#include <linux/io-pgtable.h> + +static struct io_pgtable_ops * +armv8pt_iommu_alloc_io_pgtable(struct pt_iommu_armv8_cfg *cfg, + struct device *iommu_dev, + struct io_pgtable_cfg **unused_pgtbl_cfg) +{ + struct io_pgtable_cfg pgtbl_cfg = {}; + enum io_pgtable_fmt fmt; + + pgtbl_cfg.ias = cfg->ias_lg2; + pgtbl_cfg.oas = cfg->oas_lg2; + if (PT_GRANUAL_SIZE == SZ_64K) + pgtbl_cfg.pgsize_bitmap |= SZ_64K | SZ_512M; + if (PT_GRANUAL_SIZE == SZ_16K) + pgtbl_cfg.pgsize_bitmap |= SZ_16K | SZ_32M; + if (PT_GRANUAL_SIZE == SZ_4K) + pgtbl_cfg.pgsize_bitmap |= SZ_4K | SZ_2M | SZ_1G; + pgtbl_cfg.coherent_walk = true; + + if (cfg->features & BIT(PT_FEAT_ARMV8_S2)) + fmt = ARM_64_LPAE_S2; + else + fmt = ARM_64_LPAE_S1; + + return alloc_io_pgtable_ops(fmt, &pgtbl_cfg, NULL); +} +#define pt_iommu_alloc_io_pgtable armv8pt_iommu_alloc_io_pgtable + +static void armv8pt_iommu_setup_ref_table(struct pt_iommu_armv8 *iommu_table, + struct io_pgtable_ops *pgtbl_ops) +{ + struct io_pgtable_cfg *pgtbl_cfg = + &io_pgtable_ops_to_pgtable(pgtbl_ops)->cfg; + struct pt_common *common = &iommu_table->armpt.common; + + /* FIXME should determine the level from the pgtbl_cfg */ + if (pt_feature(common, PT_FEAT_ARMV8_S2)) + pt_top_set(common, __va(pgtbl_cfg->arm_lpae_s2_cfg.vttbr), + pt_top_get_level(common)); + else + pt_top_set(common, __va(pgtbl_cfg->arm_lpae_s1_cfg.ttbr), + pt_top_get_level(common)); +} +#define pt_iommu_setup_ref_table armv8pt_iommu_setup_ref_table + +static u64 armv8pt_kunit_cmp_mask_entry(struct pt_state *pts) +{ + if (pts->type == PT_ENTRY_TABLE) + return pts->entry & (~(u64)(ARMV8PT_FMT_OA48)); + return pts->entry & (~(u64)ARMV8PT_FMT_CONTIG); +} +#define pt_kunit_cmp_mask_entry armv8pt_kunit_cmp_mask_entry +#endif + +#endif diff --git a/drivers/iommu/generic_pt/fmt/defs_armv8.h b/drivers/iommu/generic_pt/fmt/defs_armv8.h new file mode 100644 index 00000000000000..751372a6024e4a --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/defs_armv8.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + * + * VMSAv8-64 translation table in AArch64 mode + * + */ +#ifndef __GENERIC_PT_FMT_DEFS_ARMV8_H +#define __GENERIC_PT_FMT_DEFS_ARMV8_H + +#include <linux/generic_pt/common.h> +#include <linux/types.h> + +/* Header self-compile default defines */ +#ifndef ARMV8_GRANUAL_SIZE +#define ARMV8_GRANUAL_SIZE 4096 +#endif + +typedef u64 pt_vaddr_t; +typedef u64 pt_oaddr_t; + +struct armv8pt_write_attrs { + u64 descriptor_bits; + gfp_t gfp; +}; +#define pt_write_attrs armv8pt_write_attrs + +#endif diff --git a/drivers/iommu/generic_pt/fmt/iommu_armv8_16k.c b/drivers/iommu/generic_pt/fmt/iommu_armv8_16k.c new file mode 100644 index 00000000000000..46a5aead0007fc --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/iommu_armv8_16k.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#define PT_FMT armv8 +#define PT_FMT_VARIANT 16k +#define PT_SUPPORTED_FEATURES \ + (BIT(PT_FEAT_DMA_INCOHERENT) | BIT(PT_FEAT_ARMV8_LPA) | \ + BIT(PT_FEAT_ARMV8_S2) | BIT(PT_FEAT_ARMV8_DBM) | \ + BIT(PT_FEAT_ARMV8_S2FWB)) +#define ARMV8_GRANUAL_SIZE 16384 + +#include "iommu_template.h" diff --git a/drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c b/drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c new file mode 100644 index 00000000000000..2143104dfe0d4d --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/iommu_armv8_4k.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#define PT_FMT armv8 +#define PT_FMT_VARIANT 4k +#define PT_SUPPORTED_FEATURES \ + (BIT(PT_FEAT_DMA_INCOHERENT) | BIT(PT_FEAT_ARMV8_LPA) | \ + BIT(PT_FEAT_ARMV8_S2) | BIT(PT_FEAT_ARMV8_DBM) | \ + BIT(PT_FEAT_ARMV8_S2FWB)) +#define ARMV8_GRANUAL_SIZE 4096 + +#include "iommu_template.h" diff --git a/drivers/iommu/generic_pt/fmt/iommu_armv8_64k.c b/drivers/iommu/generic_pt/fmt/iommu_armv8_64k.c new file mode 100644 index 00000000000000..df008e716b6017 --- /dev/null +++ b/drivers/iommu/generic_pt/fmt/iommu_armv8_64k.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES + */ +#define PT_FMT armv8 +#define PT_FMT_VARIANT 64k +#define PT_SUPPORTED_FEATURES \ + (BIT(PT_FEAT_DMA_INCOHERENT) | BIT(PT_FEAT_ARMV8_LPA) | \ + BIT(PT_FEAT_ARMV8_S2) | BIT(PT_FEAT_ARMV8_DBM) | \ + BIT(PT_FEAT_ARMV8_S2FWB)) +#define ARMV8_GRANUAL_SIZE 65536 + +#include "iommu_template.h" diff --git a/include/linux/generic_pt/common.h b/include/linux/generic_pt/common.h index 6a865dbf075192..6c8296b1dd1a65 100644 --- a/include/linux/generic_pt/common.h +++ b/include/linux/generic_pt/common.h @@ -100,4 +100,26 @@ enum { PT_FEAT_FMT_START, }; +struct pt_armv8 { + struct pt_common common; +}; + +enum { + /* Use the upper address space instead of lower */ + PT_FEAT_ARMV8_TTBR1 = PT_FEAT_FMT_START, + /* + * Large Physical Address extension allows larger page sizes on 64k. + * Larger physical addresess are always supported + */ + PT_FEAT_ARMV8_LPA, + /* Use the Stage 2 format instead of Stage 1 */ + PT_FEAT_ARMV8_S2, + /* Use Dirty Bit Modifier, necessary for IOMMU dirty tracking */ + PT_FEAT_ARMV8_DBM, + /* For S2 uses the Force Write Back coding of the S2MEMATTR */ + PT_FEAT_ARMV8_S2FWB, + /* Set the NS and NSTable bits in all entries */ + PT_FEAT_ARMV8_NS, +}; + #endif diff --git a/include/linux/generic_pt/iommu.h b/include/linux/generic_pt/iommu.h index f77f6aef3f5958..64af0043d127bc 100644 --- a/include/linux/generic_pt/iommu.h +++ b/include/linux/generic_pt/iommu.h @@ -204,4 +204,77 @@ static inline void pt_iommu_deinit(struct pt_iommu *iommu_table) iommu_table->ops->deinit(iommu_table); } +struct pt_iommu_armv8 { + struct pt_iommu iommu; + struct pt_armv8 armpt; +}; + +struct pt_iommu_armv8_cfg { + struct device *iommu_device; + unsigned int features; + /* Input Address Size lg2 */ + u8 ias_lg2; + /* Output Address Size lg2 */ + u8 oas_lg2; +}; + +int pt_iommu_armv8_4k_init(struct pt_iommu_armv8 *table, + struct pt_iommu_armv8_cfg *cfg, gfp_t gfp); +int pt_iommu_armv8_16k_init(struct pt_iommu_armv8 *table, + struct pt_iommu_armv8_cfg *cfg, gfp_t gfp); +int pt_iommu_armv8_64k_init(struct pt_iommu_armv8 *table, + struct pt_iommu_armv8_cfg *cfg, gfp_t gfp); + +static size_t __pt_iommu_armv8_granuals_to_lg2(size_t granual_sizes) +{ + size_t supported_granuals = 0; + + if (IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_4K)) + supported_granuals |= BIT(12); + if (IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_16K)) + supported_granuals |= BIT(14); + if (IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_64K)) + supported_granuals |= BIT(16); + + granual_sizes &= supported_granuals; + if (!granual_sizes) + return 0; + + /* Prefer the CPU page size if possible */ + if (granual_sizes & PAGE_SIZE) + return PAGE_SHIFT; + + /* + * Otherwise prefer the largest page size smaller than the CPU page + * size + */ + if (granual_sizes % PAGE_SIZE) + return ilog2(rounddown_pow_of_two(granual_sizes % PAGE_SIZE)); + + /* Otherwise use the smallest page size available */ + return __ffs(granual_sizes); +} + +static inline int pt_iommu_armv8_init(struct pt_iommu_armv8 *table, + struct pt_iommu_armv8_cfg *cfg, + size_t granual_sizes, gfp_t gfp) +{ + switch (__pt_iommu_armv8_granuals_to_lg2(granual_sizes)) { + case 12: + if (!IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_4K)) + return -EOPNOTSUPP; + return pt_iommu_armv8_4k_init(table, cfg, gfp); + case 14: + if (!IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_16K)) + return -EOPNOTSUPP; + return pt_iommu_armv8_16k_init(table, cfg, gfp); + case 16: + if (!IS_ENABLED(CONFIG_IOMMU_PT_ARMV8_64K)) + return -EOPNOTSUPP; + return pt_iommu_armv8_64k_init(table, cfg, gfp); + default: + return -EOPNOTSUPP; + } +} + #endif -- 2.46.0