RE: [PATCH 16/16] iommupt: Add the Intel VT-D second stage page table format

"Zhang, Tina" <tina.zhang@xxxxxxxxx> · Mon, 19 Aug 2024 02:51:11 +0000

> -----Original Message-----
> From: Jason Gunthorpe <jgg@xxxxxxxxxx>
> Sent: Thursday, August 15, 2024 11:12 PM
> Cc: Alejandro Jimenez <alejandro.j.jimenez@xxxxxxxxxx>; Lu Baolu
> <baolu.lu@xxxxxxxxxxxxxxx>; David Hildenbrand <david@xxxxxxxxxx>;
> Christoph Hellwig <hch@xxxxxx>; iommu@xxxxxxxxxxxxxxx; Joao Martins
> <joao.m.martins@xxxxxxxxxx>; Tian, Kevin <kevin.tian@xxxxxxxxx>;
> kvm@xxxxxxxxxxxxxxx; linux-mm@xxxxxxxxx; Pasha Tatashin
> <pasha.tatashin@xxxxxxxxxx>; Peter Xu <peterx@xxxxxxxxxx>; Ryan
> Roberts <ryan.roberts@xxxxxxx>; Sean Christopherson
> <seanjc@xxxxxxxxxx>; Zhang, Tina <tina.zhang@xxxxxxxxx>
> Subject: [PATCH 16/16] iommupt: Add the Intel VT-D second stage page table
> format
> 
> The VT-D second stage format is almost the same as the x86 PAE format,
> except the bit encodings in the PTE are different and a few new PTE features,
> like force coherency are present.
> 
> Among all the formats it is unique in not having a designated present bit.
> 
> Cc: Tina Zhang <tina.zhang@xxxxxxxxx>
> Cc: Kevin Tian <kevin.tian@xxxxxxxxx>
> Cc: Lu Baolu <baolu.lu@xxxxxxxxxxxxxxx>
> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
> ---
>  drivers/iommu/generic_pt/Kconfig           |   6 +
>  drivers/iommu/generic_pt/fmt/Makefile      |   2 +
>  drivers/iommu/generic_pt/fmt/defs_vtdss.h  |  21 ++
>  drivers/iommu/generic_pt/fmt/iommu_vtdss.c |   8 +
>  drivers/iommu/generic_pt/fmt/vtdss.h       | 276 +++++++++++++++++++++
>  include/linux/generic_pt/common.h          |   4 +
>  include/linux/generic_pt/iommu.h           |  12 +
>  7 files changed, 329 insertions(+)
>  create mode 100644 drivers/iommu/generic_pt/fmt/defs_vtdss.h
>  create mode 100644 drivers/iommu/generic_pt/fmt/iommu_vtdss.c
>  create mode 100644 drivers/iommu/generic_pt/fmt/vtdss.h
> 
> diff --git a/drivers/iommu/generic_pt/Kconfig
> b/drivers/iommu/generic_pt/Kconfig
> index 2d08b58e953e4d..c17e09e2d03025 100644
> --- a/drivers/iommu/generic_pt/Kconfig
> +++ b/drivers/iommu/generic_pt/Kconfig
> @@ -90,6 +90,11 @@ config IOMMU_PT_DART
> 
>  	  If unsure, say N here.
> 
> +config IOMMU_PT_VTDSS
> +       tristate "IOMMU page table for Intel VT-D IOMMU Second Stage"
> +	depends on !GENERIC_ATOMIC64 # for cmpxchg64
> +	default n
> +
>  config IOMMU_PT_X86PAE
>         tristate "IOMMU page table for x86 PAE"
>  	depends on !GENERIC_ATOMIC64 # for cmpxchg64 @@ -105,6
> +110,7 @@ config IOMMUT_PT_KUNIT_TEST
>  	depends on IOMMU_PT_ARMV8_16K || !IOMMU_PT_ARMV8_16K
>  	depends on IOMMU_PT_ARMV8_64K || !IOMMU_PT_ARMV8_64K
>  	depends on IOMMU_PT_DART || !IOMMU_PT_DART
> +	depends on IOMMU_PT_VTDSS || !IOMMU_PT_VTDSS
>  	depends on IOMMU_PT_X86PAE || !IOMMU_PT_X86PAE
>  	default KUNIT_ALL_TESTS
>  endif
> diff --git a/drivers/iommu/generic_pt/fmt/Makefile
> b/drivers/iommu/generic_pt/fmt/Makefile
> index 1e10be24758fef..5a77c64d432534 100644
> --- a/drivers/iommu/generic_pt/fmt/Makefile
> +++ b/drivers/iommu/generic_pt/fmt/Makefile
> @@ -10,6 +10,8 @@ iommu_pt_fmt-$(CONFIG_IOMMU_PT_ARMV8_64K)
> += armv8_64k
> 
>  iommu_pt_fmt-$(CONFIG_IOMMU_PT_DART) += dart
> 
> +iommu_pt_fmt-$(CONFIG_IOMMU_PT_VTDSS) += vtdss
> +
>  iommu_pt_fmt-$(CONFIG_IOMMU_PT_X86PAE) += x86pae
> 
>  IOMMU_PT_KUNIT_TEST :=
> diff --git a/drivers/iommu/generic_pt/fmt/defs_vtdss.h
> b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
> new file mode 100644
> index 00000000000000..4a239bcaae2a90
> --- /dev/null
> +++ b/drivers/iommu/generic_pt/fmt/defs_vtdss.h
> @@ -0,0 +1,21 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> + *
> + */
> +#ifndef __GENERIC_PT_FMT_DEFS_VTDSS_H
> +#define __GENERIC_PT_FMT_DEFS_VTDSS_H
> +
> +#include <linux/generic_pt/common.h>
> +#include <linux/types.h>
> +
> +typedef u64 pt_vaddr_t;
> +typedef u64 pt_oaddr_t;
> +
> +struct vtdss_pt_write_attrs {
> +	u64 descriptor_bits;
> +	gfp_t gfp;
> +};
> +#define pt_write_attrs vtdss_pt_write_attrs
> +
> +#endif
> diff --git a/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
> b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
> new file mode 100644
> index 00000000000000..12e7829815047b
> --- /dev/null
> +++ b/drivers/iommu/generic_pt/fmt/iommu_vtdss.c
> @@ -0,0 +1,8 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES  */ #define
> +PT_FMT vtdss #define PT_SUPPORTED_FEATURES 0
> +
> +#include "iommu_template.h"
> diff --git a/drivers/iommu/generic_pt/fmt/vtdss.h
> b/drivers/iommu/generic_pt/fmt/vtdss.h
> new file mode 100644
> index 00000000000000..233731365ac62d
> --- /dev/null
> +++ b/drivers/iommu/generic_pt/fmt/vtdss.h
> @@ -0,0 +1,276 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*
> + * Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES
> + *
> + * Intel VT-D Second Stange 5/4 level page table
> + *
> + * This is described in
> + *   Section "3.7 Second-Stage Translation"
> + *   Section "9.8 Second-Stage Paging Entries"
> + *
> + * Of the "Intel Virtualization Technology for Directed I/O
> +Architecture
> + * Specification".
> + *
> + * The named levels in the spec map to the pts->level as:
> + *   Table/SS-PTE - 0
> + *   Directory/SS-PDE - 1
> + *   Directory Ptr/SS-PDPTE - 2
> + *   PML4/SS-PML4E - 3
> + *   PML5/SS-PML5E - 4
> + * FIXME:
> + *  force_snooping
> + *  1g optional
> + *  forbid read-only
> + *  Use of direct clflush instead of DMA API  */ #ifndef
> +__GENERIC_PT_FMT_VTDSS_H #define __GENERIC_PT_FMT_VTDSS_H
> +
> +#include "defs_vtdss.h"
> +#include "../pt_defs.h"
> +
> +#include <linux/bitfield.h>
> +#include <linux/container_of.h>
> +#include <linux/log2.h>
> +
> +enum {
> +	PT_MAX_OUTPUT_ADDRESS_LG2 = 52,
> +	PT_MAX_VA_ADDRESS_LG2 = 57,
> +	PT_ENTRY_WORD_SIZE = sizeof(u64),
> +	PT_MAX_TOP_LEVEL = 4,
> +	PT_GRANUAL_LG2SZ = 12,
> +	PT_TABLEMEM_LG2SZ = 12,
> +};
> +
> +/* Shared descriptor bits */
> +enum {
> +	VTDSS_FMT_R = BIT(0),
> +	VTDSS_FMT_W = BIT(1),
> +	VTDSS_FMT_X = BIT(2),

VT-d Spec doesn't have this BIT(2) defined.

> +/*
> + * Requires Tina's series:
> + *
> +https://patch.msgid.link/r/20231106071226.9656-3-tina.zhang@xxxxxxxxx
> + * See my github for an integrated version  */ #if
> +defined(GENERIC_PT_KUNIT) &&
> +IS_ENABLED(CONFIG_CONFIG_IOMMU_IO_PGTABLE_VTD)
> +#include <linux/io-pgtable.h>
> +
> +static struct io_pgtable_ops *
> +vtdss_pt_iommu_alloc_io_pgtable(struct pt_iommu_vtdss_cfg *cfg,
> +				struct device *iommu_dev,
> +				struct io_pgtable_cfg **unused_pgtbl_cfg) {
> +	struct io_pgtable_cfg pgtbl_cfg = {};
> +
> +	pgtbl_cfg.ias = 48;
> +	pgtbl_cfg.oas = 52;

Since the alloca_io_pgtable_ops() is used for PT allocation, the pgtbl_cfg.ias and pgtbl_cfg.oas can be provided with the theoretical max address sizes or simply leave them unassigned here. Otherwise, it may seem confusing because the proper values may need to consult on VT-d cap registers.

The VT-d driver will assign valid values to those fields anyway when alloc_io_pgtable_ops() is being invoked.

Regards,
-Tina